"""Utility function relating to numerical spans."""

import re
import enum
import math
# from preprocess.text_utils import get_pieces

_NUMBER_PATTERN = re.compile(r'((^|\s)[+-])?((\.\d+)|(\d+(,\d\d\d)*(\.\d*)?))')
_ORDINAL_SUFFIXES = ['st', 'nd', 'rd', 'th']
_INF = float('INF')
_NUMBER_WORDS = [
    'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
    'nine', 'ten', 'eleven', 'twelve'
]
_ORDINAL_WORDS = [
    'zeroth', 'first', 'second', 'third', 'fourth', 'fith', 'sixth', 'seventh',
    'eighth', 'ninth', 'tenth', 'eleventh', 'twelfth'
]

def _parse_number(text):
    """Parse simple cardinal and ordinal numbers."""
    for suffix in _ORDINAL_SUFFIXES:
        if text.endswith(suffix):
            text = text[: -len(suffix)]
            break

    text = text.replace('.', '')
    try:
        value = float(text)
    except ValueError:
        return None
    
    if math.isnan(value):
        return None
    if value == _INF:
        return None
    return value



def _get_span_length_key(span):
  """Sorts span (begin_index, end_index) by decreasing length first and incresing first index second."""
  begin_index, end_index = span
  return end_index - begin_index, -begin_index
  # return span[1] - span[0], -span[0]

# def parse_text(text):
#     """Extract longest number and date spans."""
#     span_dict = collections.defaultdict(list)
#     for match in _NUMBER_PATTERN.finditer(text):
#         span_text = text[match.start(): match.end()]
#         number = _parse_number(span_text)
#         if number is not None:
#             span_dict[match.span()].append(
#                 normalize_number(number) )
    
#     for begin_index, end_index in text_utils.get_pieces(text, 1):
#         if (begin_index, end_index) in span_dict:
#             continue
#         span_text = text[begin_index: end_index]

#         number = _parse_number(span_text)
#         if number is not None:
#             span_dict[begin_index, end_index].append(
#                 normalize_number(number) )
#         for number, word in enumerate(_NUMBER_WORDS):
#             if span_text == word:
#                 span_dict[begin_index, end_index].append(
#                     normalize_number(float(number)) )
#                 break
#         for number, word in enumerate(_ORDINAL_WORDS):
#             if span_text == word:
#                 span_dict[begin_index, end_index].append(
#                     normalize_number(float(number)) )
#                 break
    
#     spans = sorted(
#         span_dict.items(), 
#         key=lambda span_value: _get_span_length_key(span_value[0]), 
#         reverse=True
#     )
#     selected_spans = []
#     for span, number in spans:
#         for selected_span, _ in selected_spans:
#             if (selected_span[0] <= span[0] and (span[1]) <= selected_span[1]):
#                 break
#             else:
#                 selected_spans.append((span, number))
#     selected_spans.sort(key=lambda span_value: span_value[0][0])

#     return selected_spans
    


# def find_numeric_spans(text):
#     """Find the numerical spans (date, number, etc.) given a text segment."""
#     numeric_spans = parse_text(text)
#     return [span.values for span in numeric_spans]


text = "As a consequence of the Chinese ban on Canadian pork, Canada had only a 0.55% increase in total Canadian exports between 2018 and 2019. "


def normalize_number(number):
    """Normalize a numeric value. """
    return number


class ValueTypes(enum.Enum):
    INT = 1
    FLOAT = 2
    PERCENT_INT = 3
    PERCENT_FLOAT = 4
    NONE = 5


def is_integer(float_value, text=None):
    """Identify if a (float) value is originally a integer."""
    if (text is not None) and ('.' in text):
        return False
    return math.ceil(float_value) == float_value



def parse_number(word):
    """Identiy: 1. if the word is a number, 2. its type of value, 3. its value.
    Using a series of increasingly complex heuristics.
    #1 directly find if isinstance of int/float
    #2 remove thousands (',') and percentage ('%')
    """

    sanitized = word

    # level1: direct check
    try:
        float_value = float(sanitized)
        if is_integer(float_value, sanitized):
            int_value = int(float_value)
            return int_value
        elif isinstance(float_value, float):
            return float_value
    # except:
    #     print(f'Failed at Level #1: {sanitized} cannot be directly converted to a float/int.')

    # # level2: remove thousands and percentage
    # try:
        if sanitized[0] == '(':
            sanitized = sanitized[1: ]
        if (sanitized[-1] == '%') or (sanitized[-1] == ')'):
            sanitized = sanitized[: -1]
            
        if sanitized.count('.') == 1:   # float, 1,000.7  32.00
            return float(sanitized.replace(',', ''))
        else:  # 1,000
            return int(sanitized.replace(',', ''))
    except:
        # print(f'Failed at Level #2: {sanitized} cannot be converted to a float/int.')
        return


# %% A unified Normalized number class


class NormalNumber(object):
    """A platform to store multiple representations of a number.
    Attributes:
        is_percent: True / False
        float: [0.0, 1.0] if is-percent, else arbitrary float
        string: 'xx.x%' if is-percent, else 'xx.x'
    """

    def __init__(self, sources, calc_index=-1):
        self.is_percent = self.has_percent_source(sources)
        self.float = self.get_float(sources, self.is_percent, calc_index)
        self.set_string()  # self.float, self.is_percent
    
    @staticmethod
    def has_percent_source(sources):
        """Find if any percent representation in sources."""
        has_percent = [
            isinstance(cnum, str) and cnum[-1] == '%'
            for cnum in sources
        ]
        return any(has_percent)
    
    @staticmethod
    def normalize_to_float(source, standardize, 
        rounding_precision=4):
        """Normalize a str/float source to float."""
        if isinstance(source, str):
            source = ''.join(source.split(','))
            if source[-1] == '%':
                try:
                    fnum = round(float(source[:-1]) / 100.0, 
                        rounding_precision)
                    assert 0 <= fnum <= 1
                    return fnum
                except: return None
            else:
                try: fnum = round(float(source), rounding_precision)
                except: return None
        else: fnum = round(float(source), rounding_precision)

        assert isinstance(fnum, float), \
            f'normalization got unexpected number [{source}] of type [{type(source)}]'
        if (fnum > 1.0) and (standardize == True):
            fnum = fnum / 100.0
        return round(fnum, rounding_precision)
    
    def get_float(self, sources, standardize, calc_index=-1):
        """Get a unified float repr from arbitrary types of sources.
        calc_index == -1, return the manually calculated number by default.
        """
        floats = [self.normalize_to_float(s, standardize) for s in sources]
        floats = [f for f in floats if (f is not None)]
        if len(floats) > 1 and (max(floats) - min(floats)) < 0.02: return floats[calc_index]
        else: return None
    
    def set_string(self):
        """Get the string repr given the parsed float and percent-type."""
        if self.float is None: 
            self.string = None
            return

        if self.is_percent: 
            percent_float = round((self.float * 100), 2)
            self.string = f'{percent_float:.1f}%'
        else: self.string = f'{self.float:.2f}'
        # print(f'set self string to {self.string}')

    @classmethod
    def create_number(cls, sources, calc_index=-1):
        """Try if able to create a normal number from multiple sources.
        Else, return 'None'.
        """
        tmp_num = cls(sources, calc_index)
        if (tmp_num.float is None) and (tmp_num.string is None):
            # print(f'Unable to create a NormalNumber from sources: {sources}')
            return None
        return tmp_num

