# coding: utf-8
#
# Copyright 2019 Geocom Informatik AG / VertiGIS
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Module that contains helper functions to improve text handling and formatting.
"""
import typing as _tp
from numbers import Number as _Number
from datetime import datetime as _dt
import gpf.common.const as _const
import gpf.common.validate as _vld
[docs]def get_alphachars(text: str) -> str:
"""
Returns all alphabetic characters [a-zA-Z] in string *text* in a new (concatenated) string.
Example:
>>> get_alphachars('Test123')
'Test'
:param text: The string to search.
"""
_vld.pass_if(_vld.is_text(text), TypeError, "'text' attribute must be a string (got {!r})".format(text))
return _const.CHAR_EMPTY.join(s for s in text if s.isalpha())
[docs]def get_digits(text: str) -> str:
"""
Returns all numeric characters (digits) in string *text* in a new (concatenated) **string**.
Example:
>>> get_digits('Test123')
'123'
>>> int(get_digits('The answer is 42'))
42
:param text: The string to search.
"""
_vld.pass_if(_vld.is_text(text), TypeError, "'text' attribute must be a string (got {!r})".format(text))
return _const.CHAR_EMPTY.join(s for s in text if s.isdigit())
[docs]def to_str(value: _tp.Any, encoding: str = _const.ENC_UTF8) -> str:
"""
This function behaves similar to the built-in :func:`str` method: it converts any value into a string.
However, if *value* is a ``bytes`` object, it will be decoded according to the specified *encoding*.
:param value: The value to convert to string.
:param encoding: The encoding to use when value is a ``bytes`` object.
.. note:: By default, the encoding is UTF-8, unless the user specified something else.
If this function fails to decode the value into ``str`` using the specified encoding,
the default system encoding is used instead (which often is cp1252).
For this fallback case, the 'replace' method is used, which means that it will not
raise an error if it fails. Bytes that fail to decode will be replaced by a question mark.
"""
if isinstance(value, bytes):
try:
return value.decode(encoding)
except UnicodeError:
return value.decode(_const.ENC_DEFAULT, errors='replace')
else:
return str(value)
[docs]def to_bytes(value: _tp.Any, encoding: str = _const.ENC_UTF8) -> bytes:
"""
This function behaves similar to the built-in :func:`bytes` method: it converts any value into a ``bytes`` object.
However, if *value* is a ``str``, it will be decoded according to the specified *encoding*.
:param value: The value to convert to bytes.
:param encoding: The encoding to use when value is a ``str``.
.. note:: By default, the encoding is UTF-8, unless the user specified something else.
If this function fails to encode the value into bytes using the specified encoding,
the default system encoding is used instead (which often is cp1252).
For this fallback case, the 'replace' method is used, which means that it will not
raise an error if it fails. Characters that fail to encode will be replaced by a question mark.
.. warning:: Python 3 only!
"""
if isinstance(value, str):
try:
return value.encode(encoding)
except UnicodeError:
return value.encode(_const.ENC_DEFAULT, errors='replace')
else:
return bytes(value)
[docs]def to_repr(value: _tp.Any, encoding: str = _const.ENC_UTF8) -> str:
"""
This function behaves similar to the built-in :func:`repr` method: it converts any value into its representation.
However, if *value* is a bytes-like object, it will be decoded using the specified *encoding* (defaults to UTF-8).
The encoding will use the 'replace' method, which means that it will not raise an error if it fails.
This means that the representation of the bytes-like object will not have the 'b' prefix anymore.
:param value: The value for which to get its representation.
:param encoding: The encoding to use when value is a ``bytes`` or ``bytearray`` object.
"""
if isinstance(value, (str, bytes, bytearray)):
return repr(to_str(value, encoding))
else:
return repr(value)
[docs]def capitalize(text: str) -> str:
"""
Function that works similar to the built-in string method :func:`str.capitalize`,
except that it only makes the first character uppercase, and leaves the other characters unchanged.
:param text: The string to capitalize.
"""
_vld.pass_if(_vld.is_text(text), TypeError, "'text' attribute must be a string (got {!r})".format(text))
if len(text) < 2:
return text.upper()
return f'{text[0].upper()}{text[1:]}'
[docs]def unquote(text: str) -> str:
"""
Strips trailing quotes from a text string and returns it.
:param text: The string to strip.
"""
_vld.pass_if(_vld.is_text(text), TypeError, "'text' attribute must be a string (got {!r})".format(text))
return text.strip('\'"`')