Module hxl.datatypes

Utility functions for testing and normalising scalar-ish data types

Other modules in libhxl use these functions for consistent type checking, conversion, and normalisation.

Examples

s = hxl.datatypes.normalise("   This IS a String  ") # => "this is a string"
s = hxl.datatypes.normalise_whitespace("   a  b\nc") # => "a b c"
s = hxl.datatypes.normalise_date("1/13/2020") # => "2020-01-13"
hxl.datatypes.is_empty("     ") # => True
type = hxl.datatypes.typeof("     ") # => "empty"

Author

David Megginson

License

Public Domain

Expand source code
"""Utility functions for testing and normalising scalar-ish data types

Other modules in libhxl use these functions for consistent type
checking, conversion, and normalisation.

Examples:
    ```
    s = hxl.datatypes.normalise("   This IS a String  ") # => "this is a string"
    s = hxl.datatypes.normalise_whitespace("   a  b\\nc") # => "a b c"
    s = hxl.datatypes.normalise_date("1/13/2020") # => "2020-01-13"
    hxl.datatypes.is_empty("     ") # => True
    type = hxl.datatypes.typeof("     ") # => "empty"
    ```

Author:
    David Megginson

License:
    Public Domain

"""

import collections, datetime, dateutil.parser, json, logging, re, six, unidecode

__all__ = ["TOKEN_PATTERN", "normalise", "typeof", "flatten", "is_truthy", "is_empty", "is_string", "is_token", "normalise_space", "normalise_string", "is_number", "normalise_number", "is_date", "normalise_date", "is_dict", "is_list"]

logger = logging.getLogger(__name__)



########################################################################
# Constants
########################################################################

TOKEN_PATTERN = r'[A-Za-z][_0-9A-Za-z]*'
"""A regular expression matching a single string token.
"""

_WHITESPACE_PATTERN = re.compile(r'\s+', re.MULTILINE)

_ISO_DATE_PATTERN = re.compile(
    r'^(?P<year>[12]\d\d\d)(?:Q(?P<quarter>[1-4])|W(?P<week>\d\d?)|-(?P<month>\d\d?)(?:-(?P<day>\d\d?))?)?$',
    re.IGNORECASE
)

_SQL_DATETIME_PATTERN = re.compile(
    r'^(?P<year>[12]\d\d\d)-(?P<month>\d\d?)-(?P<day>\d\d?) \d\d?:\d\d?:\d\d?(?P<week>)?(?P<quarter>)?$'
)

_DEFAULT_DATE_1 = datetime.datetime(2015, 1, 1)

_DEFAULT_DATE_2 = datetime.datetime(2016, 3, 3)



########################################################################
# Functions
########################################################################

def normalise(value, col=None, dayfirst=True):
    """Intelligently normalise a value, optionally using the HXL hashtag and attributes for hints

    Attempt to guess the value's type using duck typing and
    (optionally) hints from the HXL hashtag, then product a string
    containing a standard representation of a date or number (if
    appropriate), or a string with whitespace normalised.

    Args:
        value: the value to convert to a normalised string
        col (hxl.model.Column): an optional Column object associated with the string (for hints)
        dayfirst (bool): hint for whether to default to DD-MM-YYYY or MM-DD-YYY when ambiguous.

    Returns:
        str: A normalised string version of the value provided.

    """
    # TODO add lat/lon

    if col and col.tag == '#date':
        try:
            return normalise_date(value, dayfirst=dayfirst)
        except ValueError:
            pass

    # fall through
    try:
        return normalise_number(value)
    except ValueError:
        return normalise_string(value)


def typeof(value, col=None):
    """Use duck typing and HXL hinting to guess of a value

    Args:
        value: the value to check
        col (hxl.model.Column): an optional Column object for hinting (via the hashtag and attributes)

    Returns:
        str: one of the strings "date", "number", "empty", or "string"

    """
    if col and col.tag == '#date' and is_date(value):
        return 'date'
    elif is_number(value):
        return 'number'
    elif is_empty(value):
        return 'empty'
    else:
        return 'string'


def flatten(value, use_json=True, separator=" | "):
    """Flatten potential lists and dictionaries

    If use_json is false, then remove hierarchies, and create a single list
    separated with " | ", and will use dict keys rather than values.

    Args:
        value: the value to flatten (may be a list)
        use_json (bool): if True (default), encode top-level lists as JSON
        separator (str): the string to use as a separator, if use_json is false

    Returns:
        str: a string version of the value

    """
    # keep it simple for now
    if value is None:
        return ''
    elif is_list(value) or is_dict(value):
        if use_json:
            return json.dumps(value)
        else:
            return " | ".join([flatten(item, False) for item in value])
    else:
        return str(value)

    
def is_truthy(value):
    """Loosely check for a boolean-type true value

    Accepts values such as "1", "yes", "t", "true", etc

    Args:
        value: the value to test

    Returns:
        bool: True if the value appears truthy

    """
    return normalise_string(value) in ['y', 'yes', 't', 'true', '1']


def is_empty(value):
    """Test for a functionally-empty value.

    None, empty string, or whitespace only counts as empty; anything else doesn't.

    Args:
        value: value to test

    Returns:
        bool: True if the value is functionally empty

    """
    return (value is None or value == '' or str(value).isspace())


def is_string(value):
    """Test if a value is already a string

    Looks for an actual string data type.

    Args:
        value: the value to test

    Returns:
        bool: True if the value is a string type.

    """
    return isinstance(value, six.string_types)


def is_token(value):
    """Test if a value is a valid HXL token

    A token is the string that may appear after "#" for a hashtag, or
    "+" for an attribute.  It must begin with a letter (A-Z, a-z),
    followed by letters, numbers, or underscore ("_"). Internal
    spaces, accented/non-Roman characters, and space or other
    punctuation are not allowed.

    Args:
        value: the value to test

    Returns:
        bool: True if the value is a token

    """
    return is_string(value) and re.fullmatch(TOKEN_PATTERN, value)


def normalise_space(value):
    """Normalise whitespace only in a string

    This method will convert the input value to a string first, then
    remove any leading or trailing whitespace, and replace all
    sequences of internal whitespace (including line breaks) with a
    single space character.

    Note: this does not perform other normalisations (date, etc), but
    simply calls the str() function on the value provided.

    Args:
        value: the value to normalise

    Returns:
        str: a string representation of the original value, with whitespace normalised.

    """
    if is_empty(value):
        return ''
    else:
        value = str(value).strip().replace("\n", " ")
        return re.sub(
            _WHITESPACE_PATTERN,
            ' ',
            value
        )


def normalise_string(value):
    """Normalise a string.

    Remove all leading and trailing whitespace. Convert to lower
    case. Replace all internal whitespace (including lineends) with a
    single space. Replace None with ''.

    The input value will be forced to a string using str()

    Args:
        value: the string to normalise

    Returns:
        str: the normalised string

    """
    if value is None:
        value = ''
    else:
        value = str(value)
    return normalise_space(unidecode.unidecode(value)).lower()


def is_number(value):
    """By duck typing, test if a value contains something recognisable as a number.

    Args:
        value: the value (string, int, float, etc) to test

    Returns:
        bool: True if usable as a number (via normalise_number())

    """
    try:
        float(value)
        return True
    except:
        return False


def normalise_number(value):
    """Attempt to convert a value to a number.

    Will convert to int type if it has no decimal places.

    Args:
        value: the value (string, int, float, etc) to convert.

    Returns:
        int: an integer value if there are no decimal places
        float: a floating point value if there were decimal places

    Raises:
        ValueError: if the value cannot be converted

    """
    try:
        n = float(value)
        if n == int(n):
            return int(n)
        else:
            return n
    except:
        raise ValueError("Cannot convert to number: {}".format(value))


def is_date(value):
    """Test if a value contains something recognisable as a date.

    Args:
        value: the value (string, etc) to test

    Returns:
        True if usable as a date

    """
    try:
        normalise_date(value)
        return True
    except ValueError:
        return False


def normalise_date(value, dayfirst=True):
    """Normalise a string as a date.

    This function will take a variety of different date formats and
    attempt to convert them to an ISO 8601 date, such as
    "2020-06-01". It also will use a non-ISO format for quarter years,
    such as "2020Q2".

    Args:
        value: the value to normalise as a date
        dayfirst (bool): if the date is ambiguous, assume the day comes before the month

    Returns:
        str: the date in ISO 8601 format or the extended quarters syntax
    
    Raises:
        ValueError: if the value cannot be parsed as a date

    """

    def make_date_string(year, quarter=None, month=None, week=None, day=None):
        if quarter:
            # *not* real ISO 8601
            quarter = int(quarter)
            if quarter < 1 or quarter > 4:
                raise ValueError("Illegal Quarter number: {}".format(quarter))
            return '{:04d}Q{:01d}'.format(int(year), int(quarter))
        elif week:
            week = int(week)
            if week < 1 or week > 53:
                raise ValueError("Illegal week number: {}".format(week))
            return '{:04d}W{:02d}'.format(int(year), int(week))
        elif month:
            month = int(month)
            if month < 1 or month > 12:
                raise ValueError("Illegal month number: {}".format(month))
            if day:
                day = int(day)
                if day < 1 or day > 31 or (month in [4, 6, 9, 11] and day > 30) or (month==2 and day>29):
                    raise ValueError("Illegal day {} for month {}".format(day, month))
                return '{:04d}-{:02d}-{:02d}'.format(int(year), int(month), int(day))
            else:
                return '{:04d}-{:02d}'.format(int(year), int(month))
        else:
            return '{:04d}'.format(int(year))

    # If it's a positive integer, try a quick conversion to days or seconds since epoch
    try:
        interval = int(value)
        if interval > 100000: # assume seconds for a big number
            d = datetime.datetime.fromtimestamp(interval)
            return d.strftime("%Y-%m-%d")
        elif interval >= 2200: # assume days (cut out for years)
            d = datetime.datetime(1970, 1, 1) + datetime.timedelta(days=interval-1)
            return d.strftime("%Y-%m-%d")
    except (ValueError, TypeError,):
        pass

    # First, try our quick ISO date pattern, extended to support quarter notation
    value = normalise_space(value)
    result = _ISO_DATE_PATTERN.match(value)
    if not result:
        result = _SQL_DATETIME_PATTERN.match(value)
    if result:
        return make_date_string(
            result.group('year'),
            quarter=result.group('quarter'),
            month=result.group('month'),
            week=result.group('week'),
            day=result.group('day')
        )

    # Next, check for a timestamp, which will crash the datetime module
    if value.isnumeric() and len(value) >= 10:
        if len(value) >= 16:
            timestamp = int(value) / 1000000 # nanoseconds
        if len(value) >= 13:
            timestamp = int(value) / 1000 # milliseconds
        else:
            timestamp = int(value) # seconds
        d = datetime.datetime.utcfromtimestamp(timestamp)
        return d.date().isoformat()

    # revert to full date parsing
    # we parse the date twice, to detect any default values Python might have filled in
    date1 = dateutil.parser.parse(value, default=_DEFAULT_DATE_1, dayfirst=dayfirst)
    date2 = dateutil.parser.parse(value, default=_DEFAULT_DATE_2, dayfirst=dayfirst)
    day = date1.day if date1.day==date2.day else None
    month = date1.month if date1.month==date2.month else None
    year = date1.year if date1.year==date2.year else None

    # do some quick validation
    if year is None:
        if month is not None:
            year = datetime.datetime.now().year
        else:
            raise ValueError("Will not provide default year unless month is present: {}".format(value))
    if month is None and day is not None:
        raise ValueError("Will not provide default month: {}".format(value))

    return make_date_string(year=year, month=month, day=day)


def is_dict(value):
    """Test if a value is a Python dict.

    Args:
        value: the value to test

    Returns:
        bool: True if the value is a Python dict or similar map.

    """
    return isinstance(value, collections.abc.Mapping)


def is_list(value):
    """Test if a value is a Python sequence (other than a string)

    Args:
        value: the value to test

    Returns:
        bool: True if the values is a non-string sequence.

    """
    return isinstance(value, collections.abc.Sequence) and not isinstance(value, six.string_types)

Global variables

var TOKEN_PATTERN

A regular expression matching a single string token.

Functions

def flatten(value, use_json=True, separator=' | ')

Flatten potential lists and dictionaries

If use_json is false, then remove hierarchies, and create a single list separated with " | ", and will use dict keys rather than values.

Args

value
the value to flatten (may be a list)
use_json : bool
if True (default), encode top-level lists as JSON
separator : str
the string to use as a separator, if use_json is false

Returns

str
a string version of the value
Expand source code
def flatten(value, use_json=True, separator=" | "):
    """Flatten potential lists and dictionaries

    If use_json is false, then remove hierarchies, and create a single list
    separated with " | ", and will use dict keys rather than values.

    Args:
        value: the value to flatten (may be a list)
        use_json (bool): if True (default), encode top-level lists as JSON
        separator (str): the string to use as a separator, if use_json is false

    Returns:
        str: a string version of the value

    """
    # keep it simple for now
    if value is None:
        return ''
    elif is_list(value) or is_dict(value):
        if use_json:
            return json.dumps(value)
        else:
            return " | ".join([flatten(item, False) for item in value])
    else:
        return str(value)
def is_date(value)

Test if a value contains something recognisable as a date.

Args

value
the value (string, etc) to test

Returns

True if usable as a date

Expand source code
def is_date(value):
    """Test if a value contains something recognisable as a date.

    Args:
        value: the value (string, etc) to test

    Returns:
        True if usable as a date

    """
    try:
        normalise_date(value)
        return True
    except ValueError:
        return False
def is_dict(value)

Test if a value is a Python dict.

Args

value
the value to test

Returns

bool
True if the value is a Python dict or similar map.
Expand source code
def is_dict(value):
    """Test if a value is a Python dict.

    Args:
        value: the value to test

    Returns:
        bool: True if the value is a Python dict or similar map.

    """
    return isinstance(value, collections.abc.Mapping)
def is_empty(value)

Test for a functionally-empty value.

None, empty string, or whitespace only counts as empty; anything else doesn't.

Args

value
value to test

Returns

bool
True if the value is functionally empty
Expand source code
def is_empty(value):
    """Test for a functionally-empty value.

    None, empty string, or whitespace only counts as empty; anything else doesn't.

    Args:
        value: value to test

    Returns:
        bool: True if the value is functionally empty

    """
    return (value is None or value == '' or str(value).isspace())
def is_list(value)

Test if a value is a Python sequence (other than a string)

Args

value
the value to test

Returns

bool
True if the values is a non-string sequence.
Expand source code
def is_list(value):
    """Test if a value is a Python sequence (other than a string)

    Args:
        value: the value to test

    Returns:
        bool: True if the values is a non-string sequence.

    """
    return isinstance(value, collections.abc.Sequence) and not isinstance(value, six.string_types)
def is_number(value)

By duck typing, test if a value contains something recognisable as a number.

Args

value
the value (string, int, float, etc) to test

Returns

bool
True if usable as a number (via normalise_number())
Expand source code
def is_number(value):
    """By duck typing, test if a value contains something recognisable as a number.

    Args:
        value: the value (string, int, float, etc) to test

    Returns:
        bool: True if usable as a number (via normalise_number())

    """
    try:
        float(value)
        return True
    except:
        return False
def is_string(value)

Test if a value is already a string

Looks for an actual string data type.

Args

value
the value to test

Returns

bool
True if the value is a string type.
Expand source code
def is_string(value):
    """Test if a value is already a string

    Looks for an actual string data type.

    Args:
        value: the value to test

    Returns:
        bool: True if the value is a string type.

    """
    return isinstance(value, six.string_types)
def is_token(value)

Test if a value is a valid HXL token

A token is the string that may appear after "#" for a hashtag, or "+" for an attribute. It must begin with a letter (A-Z, a-z), followed by letters, numbers, or underscore ("_"). Internal spaces, accented/non-Roman characters, and space or other punctuation are not allowed.

Args

value
the value to test

Returns

bool
True if the value is a token
Expand source code
def is_token(value):
    """Test if a value is a valid HXL token

    A token is the string that may appear after "#" for a hashtag, or
    "+" for an attribute.  It must begin with a letter (A-Z, a-z),
    followed by letters, numbers, or underscore ("_"). Internal
    spaces, accented/non-Roman characters, and space or other
    punctuation are not allowed.

    Args:
        value: the value to test

    Returns:
        bool: True if the value is a token

    """
    return is_string(value) and re.fullmatch(TOKEN_PATTERN, value)
def is_truthy(value)

Loosely check for a boolean-type true value

Accepts values such as "1", "yes", "t", "true", etc

Args

value
the value to test

Returns

bool
True if the value appears truthy
Expand source code
def is_truthy(value):
    """Loosely check for a boolean-type true value

    Accepts values such as "1", "yes", "t", "true", etc

    Args:
        value: the value to test

    Returns:
        bool: True if the value appears truthy

    """
    return normalise_string(value) in ['y', 'yes', 't', 'true', '1']
def normalise(value, col=None, dayfirst=True)

Intelligently normalise a value, optionally using the HXL hashtag and attributes for hints

Attempt to guess the value's type using duck typing and (optionally) hints from the HXL hashtag, then product a string containing a standard representation of a date or number (if appropriate), or a string with whitespace normalised.

Args

value
the value to convert to a normalised string
col : Column
an optional Column object associated with the string (for hints)
dayfirst : bool
hint for whether to default to DD-MM-YYYY or MM-DD-YYY when ambiguous.

Returns

str
A normalised string version of the value provided.
Expand source code
def normalise(value, col=None, dayfirst=True):
    """Intelligently normalise a value, optionally using the HXL hashtag and attributes for hints

    Attempt to guess the value's type using duck typing and
    (optionally) hints from the HXL hashtag, then product a string
    containing a standard representation of a date or number (if
    appropriate), or a string with whitespace normalised.

    Args:
        value: the value to convert to a normalised string
        col (hxl.model.Column): an optional Column object associated with the string (for hints)
        dayfirst (bool): hint for whether to default to DD-MM-YYYY or MM-DD-YYY when ambiguous.

    Returns:
        str: A normalised string version of the value provided.

    """
    # TODO add lat/lon

    if col and col.tag == '#date':
        try:
            return normalise_date(value, dayfirst=dayfirst)
        except ValueError:
            pass

    # fall through
    try:
        return normalise_number(value)
    except ValueError:
        return normalise_string(value)
def normalise_date(value, dayfirst=True)

Normalise a string as a date.

This function will take a variety of different date formats and attempt to convert them to an ISO 8601 date, such as "2020-06-01". It also will use a non-ISO format for quarter years, such as "2020Q2".

Args

value
the value to normalise as a date
dayfirst : bool
if the date is ambiguous, assume the day comes before the month

Returns

str
the date in ISO 8601 format or the extended quarters syntax

Raises

ValueError
if the value cannot be parsed as a date
Expand source code
def normalise_date(value, dayfirst=True):
    """Normalise a string as a date.

    This function will take a variety of different date formats and
    attempt to convert them to an ISO 8601 date, such as
    "2020-06-01". It also will use a non-ISO format for quarter years,
    such as "2020Q2".

    Args:
        value: the value to normalise as a date
        dayfirst (bool): if the date is ambiguous, assume the day comes before the month

    Returns:
        str: the date in ISO 8601 format or the extended quarters syntax
    
    Raises:
        ValueError: if the value cannot be parsed as a date

    """

    def make_date_string(year, quarter=None, month=None, week=None, day=None):
        if quarter:
            # *not* real ISO 8601
            quarter = int(quarter)
            if quarter < 1 or quarter > 4:
                raise ValueError("Illegal Quarter number: {}".format(quarter))
            return '{:04d}Q{:01d}'.format(int(year), int(quarter))
        elif week:
            week = int(week)
            if week < 1 or week > 53:
                raise ValueError("Illegal week number: {}".format(week))
            return '{:04d}W{:02d}'.format(int(year), int(week))
        elif month:
            month = int(month)
            if month < 1 or month > 12:
                raise ValueError("Illegal month number: {}".format(month))
            if day:
                day = int(day)
                if day < 1 or day > 31 or (month in [4, 6, 9, 11] and day > 30) or (month==2 and day>29):
                    raise ValueError("Illegal day {} for month {}".format(day, month))
                return '{:04d}-{:02d}-{:02d}'.format(int(year), int(month), int(day))
            else:
                return '{:04d}-{:02d}'.format(int(year), int(month))
        else:
            return '{:04d}'.format(int(year))

    # If it's a positive integer, try a quick conversion to days or seconds since epoch
    try:
        interval = int(value)
        if interval > 100000: # assume seconds for a big number
            d = datetime.datetime.fromtimestamp(interval)
            return d.strftime("%Y-%m-%d")
        elif interval >= 2200: # assume days (cut out for years)
            d = datetime.datetime(1970, 1, 1) + datetime.timedelta(days=interval-1)
            return d.strftime("%Y-%m-%d")
    except (ValueError, TypeError,):
        pass

    # First, try our quick ISO date pattern, extended to support quarter notation
    value = normalise_space(value)
    result = _ISO_DATE_PATTERN.match(value)
    if not result:
        result = _SQL_DATETIME_PATTERN.match(value)
    if result:
        return make_date_string(
            result.group('year'),
            quarter=result.group('quarter'),
            month=result.group('month'),
            week=result.group('week'),
            day=result.group('day')
        )

    # Next, check for a timestamp, which will crash the datetime module
    if value.isnumeric() and len(value) >= 10:
        if len(value) >= 16:
            timestamp = int(value) / 1000000 # nanoseconds
        if len(value) >= 13:
            timestamp = int(value) / 1000 # milliseconds
        else:
            timestamp = int(value) # seconds
        d = datetime.datetime.utcfromtimestamp(timestamp)
        return d.date().isoformat()

    # revert to full date parsing
    # we parse the date twice, to detect any default values Python might have filled in
    date1 = dateutil.parser.parse(value, default=_DEFAULT_DATE_1, dayfirst=dayfirst)
    date2 = dateutil.parser.parse(value, default=_DEFAULT_DATE_2, dayfirst=dayfirst)
    day = date1.day if date1.day==date2.day else None
    month = date1.month if date1.month==date2.month else None
    year = date1.year if date1.year==date2.year else None

    # do some quick validation
    if year is None:
        if month is not None:
            year = datetime.datetime.now().year
        else:
            raise ValueError("Will not provide default year unless month is present: {}".format(value))
    if month is None and day is not None:
        raise ValueError("Will not provide default month: {}".format(value))

    return make_date_string(year=year, month=month, day=day)
def normalise_number(value)

Attempt to convert a value to a number.

Will convert to int type if it has no decimal places.

Args

value
the value (string, int, float, etc) to convert.

Returns

int
an integer value if there are no decimal places
float
a floating point value if there were decimal places

Raises

ValueError
if the value cannot be converted
Expand source code
def normalise_number(value):
    """Attempt to convert a value to a number.

    Will convert to int type if it has no decimal places.

    Args:
        value: the value (string, int, float, etc) to convert.

    Returns:
        int: an integer value if there are no decimal places
        float: a floating point value if there were decimal places

    Raises:
        ValueError: if the value cannot be converted

    """
    try:
        n = float(value)
        if n == int(n):
            return int(n)
        else:
            return n
    except:
        raise ValueError("Cannot convert to number: {}".format(value))
def normalise_space(value)

Normalise whitespace only in a string

This method will convert the input value to a string first, then remove any leading or trailing whitespace, and replace all sequences of internal whitespace (including line breaks) with a single space character.

Note: this does not perform other normalisations (date, etc), but simply calls the str() function on the value provided.

Args

value
the value to normalise

Returns

str
a string representation of the original value, with whitespace normalised.
Expand source code
def normalise_space(value):
    """Normalise whitespace only in a string

    This method will convert the input value to a string first, then
    remove any leading or trailing whitespace, and replace all
    sequences of internal whitespace (including line breaks) with a
    single space character.

    Note: this does not perform other normalisations (date, etc), but
    simply calls the str() function on the value provided.

    Args:
        value: the value to normalise

    Returns:
        str: a string representation of the original value, with whitespace normalised.

    """
    if is_empty(value):
        return ''
    else:
        value = str(value).strip().replace("\n", " ")
        return re.sub(
            _WHITESPACE_PATTERN,
            ' ',
            value
        )
def normalise_string(value)

Normalise a string.

Remove all leading and trailing whitespace. Convert to lower case. Replace all internal whitespace (including lineends) with a single space. Replace None with ''.

The input value will be forced to a string using str()

Args

value
the string to normalise

Returns

str
the normalised string
Expand source code
def normalise_string(value):
    """Normalise a string.

    Remove all leading and trailing whitespace. Convert to lower
    case. Replace all internal whitespace (including lineends) with a
    single space. Replace None with ''.

    The input value will be forced to a string using str()

    Args:
        value: the string to normalise

    Returns:
        str: the normalised string

    """
    if value is None:
        value = ''
    else:
        value = str(value)
    return normalise_space(unidecode.unidecode(value)).lower()
def typeof(value, col=None)

Use duck typing and HXL hinting to guess of a value

Args

value
the value to check
col : Column
an optional Column object for hinting (via the hashtag and attributes)

Returns

str
one of the strings "date", "number", "empty", or "string"
Expand source code
def typeof(value, col=None):
    """Use duck typing and HXL hinting to guess of a value

    Args:
        value: the value to check
        col (hxl.model.Column): an optional Column object for hinting (via the hashtag and attributes)

    Returns:
        str: one of the strings "date", "number", "empty", or "string"

    """
    if col and col.tag == '#date' and is_date(value):
        return 'date'
    elif is_number(value):
        return 'number'
    elif is_empty(value):
        return 'empty'
    else:
        return 'string'