Module `hxl.datatypes`

Utility functions for testing and normalising scalar-ish data types

Other modules in libhxl use these functions for consistent type checking, conversion, and normalisation.

Examples

s = hxl.datatypes.normalise("   This IS a String  ") # => "this is a string"
s = hxl.datatypes.normalise_whitespace("   a  b\nc") # => "a b c"
s = hxl.datatypes.normalise_date("1/13/2020") # => "2020-01-13"
hxl.datatypes.is_empty("     ") # => True
type = hxl.datatypes.typeof("     ") # => "empty"

Author

David Megginson

License

Public Domain

Expand source code

"""Utility functions for testing and normalising scalar-ish data types

Other modules in libhxl use these functions for consistent type
checking, conversion, and normalisation.

Examples:
    ```
    s = hxl.datatypes.normalise("   This IS a String  ") # => "this is a string"
    s = hxl.datatypes.normalise_whitespace("   a  b\\nc") # => "a b c"
    s = hxl.datatypes.normalise_date("1/13/2020") # => "2020-01-13"
    hxl.datatypes.is_empty("     ") # => True
    type = hxl.datatypes.typeof("     ") # => "empty"
    ```

Author:
    David Megginson

License:
    Public Domain

"""

import collections, datetime, dateutil.parser, json, logging, re, six, unidecode

__all__ = ["TOKEN_PATTERN", "normalise", "typeof", "flatten", "is_truthy", "is_empty", "is_string", "is_token", "normalise_space", "normalise_string", "is_number", "normalise_number", "is_date", "normalise_date", "is_dict", "is_list"]

logger = logging.getLogger(__name__)



########################################################################
# Constants
########################################################################

TOKEN_PATTERN = r'[A-Za-z][_0-9A-Za-z]*'
"""A regular expression matching a single string token.
"""

_WHITESPACE_PATTERN = re.compile(r'\s+', re.MULTILINE)

_ISO_DATE_PATTERN = re.compile(
    r'^(?P<year>[12]\d\d\d)(?:Q(?P<quarter>[1-4])|W(?P<week>\d\d?)|-(?P<month>\d\d?)(?:-(?P<day>\d\d?))?)?$',
    re.IGNORECASE
)

_SQL_DATETIME_PATTERN = re.compile(
    r'^(?P<year>[12]\d\d\d)-(?P<month>\d\d?)-(?P<day>\d\d?) \d\d?:\d\d?:\d\d?(?P<week>)?(?P<quarter>)?$'
)

_DEFAULT_DATE_1 = datetime.datetime(2015, 1, 1)

_DEFAULT_DATE_2 = datetime.datetime(2016, 3, 3)



########################################################################
# Functions
########################################################################

def normalise(value, col=None, dayfirst=True):
    """Intelligently normalise a value, optionally using the HXL hashtag and attributes for hints

    Attempt to guess the value's type using duck typing and
    (optionally) hints from the HXL hashtag, then product a string
    containing a standard representation of a date or number (if
    appropriate), or a string with whitespace normalised.

    Args:
        value: the value to convert to a normalised string
        col (hxl.model.Column): an optional Column object associated with the string (for hints)
        dayfirst (bool): hint for whether to default to DD-MM-YYYY or MM-DD-YYY when ambiguous.

    Returns:
        str: A normalised string version of the value provided.

    """
    # TODO add lat/lon

    if col and col.tag == '#date':
        try:
            return normalise_date(value, dayfirst=dayfirst)
        except ValueError:
            pass

    # fall through
    try:
        return normalise_number(value)
    except ValueError:
        return normalise_string(value)


def typeof(value, col=None):
    """Use duck typing and HXL hinting to guess of a value

    Args:
        value: the value to check
        col (hxl.model.Column): an optional Column object for hinting (via the hashtag and attributes)

    Returns:
        str: one of the strings "date", "number", "empty", or "string"

    """
    if col and col.tag == '#date' and is_date(value):
        return 'date'
    elif is_number(value):
        return 'number'
    elif is_empty(value):
        return 'empty'
    else:
        return 'string'


def flatten(value, use_json=True, separator=" | "):
    """Flatten potential lists and dictionaries

    If use_json is false, then remove hierarchies, and create a single list
    separated with " | ", and will use dict keys rather than values.

    Args:
        value: the value to flatten (may be a list)
        use_json (bool): if True (default), encode top-level lists as JSON
        separator (str): the string to use as a separator, if use_json is false

    Returns:
        str: a string version of the value

    """
    # keep it simple for now
    if value is None:
        return ''
    elif is_list(value) or is_dict(value):
        if use_json:
            return json.dumps(value)
        else:
            return " | ".join([flatten(item, False) for item in value])
    else:
        return str(value)

    
def is_truthy(value):
    """Loosely check for a boolean-type true value

    Accepts values such as "1", "yes", "t", "true", etc

    Args:
        value: the value to test

    Returns:
        bool: True if the value appears truthy

    """
    return normalise_string(value) in ['y', 'yes', 't', 'true', '1']


def is_empty(value):
    """Test for a functionally-empty value.

    None, empty string, or whitespace only counts as empty; anything else doesn't.

    Args:
        value: value to test

    Returns:
        bool: True if the value is functionally empty

    """
    return (value is None or value == '' or str(value).isspace())


def is_string(value):
    """Test if a value is already a string

    Looks for an actual string data type.

    Args:
        value: the value to test

    Returns:
        bool: True if the value is a string type.

    """
    return isinstance(value, six.string_types)


def is_token(value):
    """Test if a value is a valid HXL token

    A token is the string that may appear after "#" for a hashtag, or
    "+" for an attribute.  It must begin with a letter (A-Z, a-z),
    followed by letters, numbers, or underscore ("_"). Internal
    spaces, accented/non-Roman characters, and space or other
    punctuation are not allowed.

    Args:
        value: the value to test

    Returns:
        bool: True if the value is a token

    """
    return is_string(value) and re.fullmatch(TOKEN_PATTERN, value)


def normalise_space(value):
    """Normalise whitespace only in a string

    This method will convert the input value to a string first, then
    remove any leading or trailing whitespace, and replace all
    sequences of internal whitespace (including line breaks) with a
    single space character.

    Note: this does not perform other normalisations (date, etc), but
    simply calls the str() function on the value provided.

    Args:
        value: the value to normalise

    Returns:
        str: a string representation of the original value, with whitespace normalised.

    """
    if is_empty(value):
        return ''
    else:
        value = str(value).strip().replace("\n", " ")
        return re.sub(
            _WHITESPACE_PATTERN,
            ' ',
            value
        )


def normalise_string(value):
    """Normalise a string.

    Remove all leading and trailing whitespace. Convert to lower
    case. Replace all internal whitespace (including lineends) with a
    single space. Replace None with ''.

    The input value will be forced to a string using str()

    Args:
        value: the string to normalise

    Returns:
        str: the normalised string

    """
    if value is None:
        value = ''
    else:
        value = str(value)
    return normalise_space(unidecode.unidecode(value)).lower()


def is_number(value):
    """By duck typing, test if a value contains something recognisable as a number.

    Args:
        value: the value (string, int, float, etc) to test

    Returns:
        bool: True if usable as a number (via normalise_number())

    """
    try:
        float(value)
        return True
    except:
        return False


def normalise_number(value):
    """Attempt to convert a value to a number.

    Will convert to int type if it has no decimal places.

    Args:
        value: the value (string, int, float, etc) to convert.

    Returns:
        int: an integer value if there are no decimal places
        float: a floating point value if there were decimal places

    Raises:
        ValueError: if the value cannot be converted

    """
    try:
        n = float(value)
        if n == int(n):
            return int(n)
        else:
            return n
    except:
        raise ValueError("Cannot convert to number: {}".format(value))


def is_date(value):
    """Test if a value contains something recognisable as a date.

    Args:
        value: the value (string, etc) to test

    Returns:
        True if usable as a date

    """
    try:
        normalise_date(value)
        return True
    except ValueError:
        return False


def normalise_date(value, dayfirst=True):
    """Normalise a string as a date.

    This function will take a variety of different date formats and
    attempt to convert them to an ISO 8601 date, such as
    "2020-06-01". It also will use a non-ISO format for quarter years,
    such as "2020Q2".

    Args:
        value: the value to normalise as a date
        dayfirst (bool): if the date is ambiguous, assume the day comes before the month

    Returns:
        str: the date in ISO 8601 format or the extended quarters syntax
    
    Raises:
        ValueError: if the value cannot be parsed as a date

    """

    def make_date_string(year, quarter=None, month=None, week=None, day=None):
        if quarter:
            # *not* real ISO 8601
            quarter = int(quarter)
            if quarter < 1 or quarter > 4:
                raise ValueError("Illegal Quarter number: {}".format(quarter))
            return '{:04d}Q{:01d}'.format(int(year), int(quarter))
        elif week:
            week = int(week)
            if week < 1 or week > 53:
                raise ValueError("Illegal week number: {}".format(week))
            return '{:04d}W{:02d}'.format(int(year), int(week))
        elif month:
            month = int(month)
            if month < 1 or month > 12:
                raise ValueError("Illegal month number: {}".format(month))
            if day:
                day = int(day)
                if day < 1 or day > 31 or (month in [4, 6, 9, 11] and day > 30) or (month==2 and day>29):
                    raise ValueError("Illegal day {} for month {}".format(day, month))
                return '{:04d}-{:02d}-{:02d}'.format(int(year), int(month), int(day))
            else:
                return '{:04d}-{:02d}'.format(int(year), int(month))
        else:
            return '{:04d}'.format(int(year))

    # If it's a positive integer, try a quick conversion to days or seconds since epoch
    try:
        interval = int(value)
        if interval > 100000: # assume seconds for a big number
            d = datetime.datetime.fromtimestamp(interval)
            return d.strftime("%Y-%m-%d")
        elif interval >= 2200: # assume days (cut out for years)
            d = datetime.datetime(1970, 1, 1) + datetime.timedelta(days=interval-1)
            return d.strftime("%Y-%m-%d")
    except (ValueError, TypeError,):
        pass

    # First, try our quick ISO date pattern, extended to support quarter notation
    value = normalise_space(value)
    result = _ISO_DATE_PATTERN.match(value)
    if not result:
        result = _SQL_DATETIME_PATTERN.match(value)
    if result:
        return make_date_string(
            result.group('year'),
            quarter=result.group('quarter'),
            month=result.group('month'),
            week=result.group('week'),
            day=result.group('day')
        )

    # Next, check for a timestamp, which will crash the datetime module
    if value.isnumeric() and len(value) >= 10:
        if len(value) >= 16:
            timestamp = int(value) / 1000000 # nanoseconds
        if len(value) >= 13:
            timestamp = int(value) / 1000 # milliseconds
        else:
            timestamp = int(value) # seconds
        d = datetime.datetime.utcfromtimestamp(timestamp)
        return d.date().isoformat()

    # revert to full date parsing
    # we parse the date twice, to detect any default values Python might have filled in
    date1 = dateutil.parser.parse(value, default=_DEFAULT_DATE_1, dayfirst=dayfirst)
    date2 = dateutil.parser.parse(value, default=_DEFAULT_DATE_2, dayfirst=dayfirst)
    day = date1.day if date1.day==date2.day else None
    month = date1.month if date1.month==date2.month else None
    year = date1.year if date1.year==date2.year else None

    # do some quick validation
    if year is None:
        if month is not None:
            year = datetime.datetime.now().year
        else:
            raise ValueError("Will not provide default year unless month is present: {}".format(value))
    if month is None and day is not None:
        raise ValueError("Will not provide default month: {}".format(value))

    return make_date_string(year=year, month=month, day=day)


def is_dict(value):
    """Test if a value is a Python dict.

    Args:
        value: the value to test

    Returns:
        bool: True if the value is a Python dict or similar map.

    """
    return isinstance(value, collections.abc.Mapping)


def is_list(value):
    """Test if a value is a Python sequence (other than a string)

    Args:
        value: the value to test

    Returns:
        bool: True if the values is a non-string sequence.

    """
    return isinstance(value, collections.abc.Sequence) and not isinstance(value, six.string_types)

Global variables

var TOKEN_PATTERN: A regular expression matching a single string token.

Functions

def flatten(value, use_json=True, separator=' | ')

Flatten potential lists and dictionaries

If use_json is false, then remove hierarchies, and create a single list separated with " | ", and will use dict keys rather than values.

Args

value: the value to flatten (may be a list)
use_json : bool: if True (default), encode top-level lists as JSON
separator : str: the string to use as a separator, if use_json is false

Returns

str: a string version of the value

Expand source code

def flatten(value, use_json=True, separator=" | "):
    """Flatten potential lists and dictionaries

    If use_json is false, then remove hierarchies, and create a single list
    separated with " | ", and will use dict keys rather than values.

    Args:
        value: the value to flatten (may be a list)
        use_json (bool): if True (default), encode top-level lists as JSON
        separator (str): the string to use as a separator, if use_json is false

    Returns:
        str: a string version of the value

    """
    # keep it simple for now
    if value is None:
        return ''
    elif is_list(value) or is_dict(value):
        if use_json:
            return json.dumps(value)
        else:
            return " | ".join([flatten(item, False) for item in value])
    else:
        return str(value)

def is_date(value)

Test if a value contains something recognisable as a date.

Args

value: the value (string, etc) to test

Returns

True if usable as a date

Expand source code

def is_date(value):
    """Test if a value contains something recognisable as a date.

    Args:
        value: the value (string, etc) to test

    Returns:
        True if usable as a date

    """
    try:
        normalise_date(value)
        return True
    except ValueError:
        return False

def is_dict(value)

Test if a value is a Python dict.

Args

value: the value to test

Returns

bool: True if the value is a Python dict or similar map.

Expand source code

def is_dict(value):
    """Test if a value is a Python dict.

    Args:
        value: the value to test

    Returns:
        bool: True if the value is a Python dict or similar map.

    """
    return isinstance(value, collections.abc.Mapping)

def is_empty(value)

Test for a functionally-empty value.

None, empty string, or whitespace only counts as empty; anything else doesn't.

Args

value: value to test

Returns

bool: True if the value is functionally empty

Expand source code

def is_empty(value):
    """Test for a functionally-empty value.

    None, empty string, or whitespace only counts as empty; anything else doesn't.

    Args:
        value: value to test

    Returns:
        bool: True if the value is functionally empty

    """
    return (value is None or value == '' or str(value).isspace())

def is_list(value)

Test if a value is a Python sequence (other than a string)

Args

value: the value to test

Returns

bool: True if the values is a non-string sequence.

Expand source code

def is_list(value):
    """Test if a value is a Python sequence (other than a string)

    Args:
        value: the value to test

    Returns:
        bool: True if the values is a non-string sequence.

    """
    return isinstance(value, collections.abc.Sequence) and not isinstance(value, six.string_types)

def is_number(value)

By duck typing, test if a value contains something recognisable as a number.

Args

value: the value (string, int, float, etc) to test

Returns

bool: True if usable as a number (via normalise_number())

Expand source code

def is_number(value):
    """By duck typing, test if a value contains something recognisable as a number.

    Args:
        value: the value (string, int, float, etc) to test

    Returns:
        bool: True if usable as a number (via normalise_number())

    """
    try:
        float(value)
        return True
    except:
        return False

def is_string(value)

Test if a value is already a string

Looks for an actual string data type.

Args

value: the value to test

Returns

bool: True if the value is a string type.

Expand source code

def is_string(value):
    """Test if a value is already a string

    Looks for an actual string data type.

    Args:
        value: the value to test

    Returns:
        bool: True if the value is a string type.

    """
    return isinstance(value, six.string_types)

def is_token(value)

Test if a value is a valid HXL token

A token is the string that may appear after "#" for a hashtag, or "+" for an attribute. It must begin with a letter (A-Z, a-z), followed by letters, numbers, or underscore ("_"). Internal spaces, accented/non-Roman characters, and space or other punctuation are not allowed.

Args

value: the value to test

Returns

bool: True if the value is a token

Expand source code

def is_token(value):
    """Test if a value is a valid HXL token

    A token is the string that may appear after "#" for a hashtag, or
    "+" for an attribute.  It must begin with a letter (A-Z, a-z),
    followed by letters, numbers, or underscore ("_"). Internal
    spaces, accented/non-Roman characters, and space or other
    punctuation are not allowed.

    Args:
        value: the value to test

    Returns:
        bool: True if the value is a token

    """
    return is_string(value) and re.fullmatch(TOKEN_PATTERN, value)

def is_truthy(value)

Loosely check for a boolean-type true value

Accepts values such as "1", "yes", "t", "true", etc

Args

value: the value to test

Returns

bool: True if the value appears truthy

Expand source code

def is_truthy(value):
    """Loosely check for a boolean-type true value

    Accepts values such as "1", "yes", "t", "true", etc

    Args:
        value: the value to test

    Returns:
        bool: True if the value appears truthy

    """
    return normalise_string(value) in ['y', 'yes', 't', 'true', '1']

def normalise(value, col=None, dayfirst=True)

Intelligently normalise a value, optionally using the HXL hashtag and attributes for hints

Attempt to guess the value's type using duck typing and (optionally) hints from the HXL hashtag, then product a string containing a standard representation of a date or number (if appropriate), or a string with whitespace normalised.

Args

value: the value to convert to a normalised string
col : Column: an optional Column object associated with the string (for hints)
dayfirst : bool: hint for whether to default to DD-MM-YYYY or MM-DD-YYY when ambiguous.

Returns

str: A normalised string version of the value provided.

Expand source code

def normalise(value, col=None, dayfirst=True):
    """Intelligently normalise a value, optionally using the HXL hashtag and attributes for hints

    Attempt to guess the value's type using duck typing and
    (optionally) hints from the HXL hashtag, then product a string
    containing a standard representation of a date or number (if
    appropriate), or a string with whitespace normalised.

    Args:
        value: the value to convert to a normalised string
        col (hxl.model.Column): an optional Column object associated with the string (for hints)
        dayfirst (bool): hint for whether to default to DD-MM-YYYY or MM-DD-YYY when ambiguous.

    Returns:
        str: A normalised string version of the value provided.

    """
    # TODO add lat/lon

    if col and col.tag == '#date':
        try:
            return normalise_date(value, dayfirst=dayfirst)
        except ValueError:
            pass

    # fall through
    try:
        return normalise_number(value)
    except ValueError:
        return normalise_string(value)

def normalise_date(value, dayfirst=True)

Normalise a string as a date.

This function will take a variety of different date formats and attempt to convert them to an ISO 8601 date, such as "2020-06-01". It also will use a non-ISO format for quarter years, such as "2020Q2".

Args

value: the value to normalise as a date
dayfirst : bool: if the date is ambiguous, assume the day comes before the month

Returns

str: the date in ISO 8601 format or the extended quarters syntax

Raises

ValueError: if the value cannot be parsed as a date

Expand source code

def normalise_date(value, dayfirst=True):
    """Normalise a string as a date.

    This function will take a variety of different date formats and
    attempt to convert them to an ISO 8601 date, such as
    "2020-06-01". It also will use a non-ISO format for quarter years,
    such as "2020Q2".

    Args:
        value: the value to normalise as a date
        dayfirst (bool): if the date is ambiguous, assume the day comes before the month

    Returns:
        str: the date in ISO 8601 format or the extended quarters syntax
    
    Raises:
        ValueError: if the value cannot be parsed as a date

    """

    def make_date_string(year, quarter=None, month=None, week=None, day=None):
        if quarter:
            # *not* real ISO 8601
            quarter = int(quarter)
            if quarter < 1 or quarter > 4:
                raise ValueError("Illegal Quarter number: {}".format(quarter))
            return '{:04d}Q{:01d}'.format(int(year), int(quarter))
        elif week:
            week = int(week)
            if week < 1 or week > 53:
                raise ValueError("Illegal week number: {}".format(week))
            return '{:04d}W{:02d}'.format(int(year), int(week))
        elif month:
            month = int(month)
            if month < 1 or month > 12:
                raise ValueError("Illegal month number: {}".format(month))
            if day:
                day = int(day)
                if day < 1 or day > 31 or (month in [4, 6, 9, 11] and day > 30) or (month==2 and day>29):
                    raise ValueError("Illegal day {} for month {}".format(day, month))
                return '{:04d}-{:02d}-{:02d}'.format(int(year), int(month), int(day))
            else:
                return '{:04d}-{:02d}'.format(int(year), int(month))
        else:
            return '{:04d}'.format(int(year))

    # If it's a positive integer, try a quick conversion to days or seconds since epoch
    try:
        interval = int(value)
        if interval > 100000: # assume seconds for a big number
            d = datetime.datetime.fromtimestamp(interval)
            return d.strftime("%Y-%m-%d")
        elif interval >= 2200: # assume days (cut out for years)
            d = datetime.datetime(1970, 1, 1) + datetime.timedelta(days=interval-1)
            return d.strftime("%Y-%m-%d")
    except (ValueError, TypeError,):
        pass

    # First, try our quick ISO date pattern, extended to support quarter notation
    value = normalise_space(value)
    result = _ISO_DATE_PATTERN.match(value)
    if not result:
        result = _SQL_DATETIME_PATTERN.match(value)
    if result:
        return make_date_string(
            result.group('year'),
            quarter=result.group('quarter'),
            month=result.group('month'),
            week=result.group('week'),
            day=result.group('day')
        )

    # Next, check for a timestamp, which will crash the datetime module
    if value.isnumeric() and len(value) >= 10:
        if len(value) >= 16:
            timestamp = int(value) / 1000000 # nanoseconds
        if len(value) >= 13:
            timestamp = int(value) / 1000 # milliseconds
        else:
            timestamp = int(value) # seconds
        d = datetime.datetime.utcfromtimestamp(timestamp)
        return d.date().isoformat()

    # revert to full date parsing
    # we parse the date twice, to detect any default values Python might have filled in
    date1 = dateutil.parser.parse(value, default=_DEFAULT_DATE_1, dayfirst=dayfirst)
    date2 = dateutil.parser.parse(value, default=_DEFAULT_DATE_2, dayfirst=dayfirst)
    day = date1.day if date1.day==date2.day else None
    month = date1.month if date1.month==date2.month else None
    year = date1.year if date1.year==date2.year else None

    # do some quick validation
    if year is None:
        if month is not None:
            year = datetime.datetime.now().year
        else:
            raise ValueError("Will not provide default year unless month is present: {}".format(value))
    if month is None and day is not None:
        raise ValueError("Will not provide default month: {}".format(value))

    return make_date_string(year=year, month=month, day=day)

def normalise_number(value)

Attempt to convert a value to a number.

Will convert to int type if it has no decimal places.

Args

value: the value (string, int, float, etc) to convert.

Returns

int: an integer value if there are no decimal places
float: a floating point value if there were decimal places

Raises

ValueError: if the value cannot be converted

Expand source code

def normalise_number(value):
    """Attempt to convert a value to a number.

    Will convert to int type if it has no decimal places.

    Args:
        value: the value (string, int, float, etc) to convert.

    Returns:
        int: an integer value if there are no decimal places
        float: a floating point value if there were decimal places

    Raises:
        ValueError: if the value cannot be converted

    """
    try:
        n = float(value)
        if n == int(n):
            return int(n)
        else:
            return n
    except:
        raise ValueError("Cannot convert to number: {}".format(value))

def normalise_space(value)

Normalise whitespace only in a string

This method will convert the input value to a string first, then remove any leading or trailing whitespace, and replace all sequences of internal whitespace (including line breaks) with a single space character.

Note: this does not perform other normalisations (date, etc), but simply calls the str() function on the value provided.

Args

value: the value to normalise

Returns

str: a string representation of the original value, with whitespace normalised.

Expand source code

def normalise_space(value):
    """Normalise whitespace only in a string

    This method will convert the input value to a string first, then
    remove any leading or trailing whitespace, and replace all
    sequences of internal whitespace (including line breaks) with a
    single space character.

    Note: this does not perform other normalisations (date, etc), but
    simply calls the str() function on the value provided.

    Args:
        value: the value to normalise

    Returns:
        str: a string representation of the original value, with whitespace normalised.

    """
    if is_empty(value):
        return ''
    else:
        value = str(value).strip().replace("\n", " ")
        return re.sub(
            _WHITESPACE_PATTERN,
            ' ',
            value
        )

def normalise_string(value)

Normalise a string.

Remove all leading and trailing whitespace. Convert to lower case. Replace all internal whitespace (including lineends) with a single space. Replace None with ''.

The input value will be forced to a string using str()

Args

value: the string to normalise

Returns

str: the normalised string

Expand source code

def normalise_string(value):
    """Normalise a string.

    Remove all leading and trailing whitespace. Convert to lower
    case. Replace all internal whitespace (including lineends) with a
    single space. Replace None with ''.

    The input value will be forced to a string using str()

    Args:
        value: the string to normalise

    Returns:
        str: the normalised string

    """
    if value is None:
        value = ''
    else:
        value = str(value)
    return normalise_space(unidecode.unidecode(value)).lower()

def typeof(value, col=None)

Use duck typing and HXL hinting to guess of a value

Args

value: the value to check
col : Column: an optional Column object for hinting (via the hashtag and attributes)

Returns

str: one of the strings "date", "number", "empty", or "string"

Expand source code

def typeof(value, col=None):
    """Use duck typing and HXL hinting to guess of a value

    Args:
        value: the value to check
        col (hxl.model.Column): an optional Column object for hinting (via the hashtag and attributes)

    Returns:
        str: one of the strings "date", "number", "empty", or "string"

    """
    if col and col.tag == '#date' and is_date(value):
        return 'date'
    elif is_number(value):
        return 'number'
    elif is_empty(value):
        return 'empty'
    else:
        return 'string'