import sexpdata


def _clean_value(value):
    '''
    Clean utterances
    '''
    # Characters to replace
    from_to = [('“', '"'), ('”', '"'), ('’', '\''),
               ('‘', '`'), ('–', '-'), ('- ', '-')]
    for from_, to in from_to:
        value = value.replace(from_, to)

    return value.lower()


class CFGNode:
    def __init__(self, value, childs, parent=None):
        self.value = _clean_value(value)
        self.childs = childs
        self.parent = parent
        self.beg = 0
        self.end = 0
        self.index = 0

    def __str__(self):
        return '("{}"<{}:{}> {})'.format(self.value,
                                         self.beg, self.end,
                                         ' '.join(str(c) for c in self.childs))

    def __repr__(self):
        return '("{}"<{}:{}> {} childs)'.format(self.value,
                                                self.beg, self.end,
                                                len(self.childs))

    def first_cross(self):
        p = self.parent
        n = self
        while p and len(p.childs) == 1:
            n = p
            p = p.parent
        return p, n

    def get_span_cfg_nodes(self, beg, end):
        '''
        获得 span 覆盖的所有结点
        '''
        if self.beg >= beg and self.end <= end:
            return [self]
        index, n = 0, len(self.childs)
        # 如果被叶子结点包含 's 这种情况
        if self.beg <= beg and self.end >= end:
            m = n
            node = self
            while m == 1:
                node = node.childs[0]
                m = len(node.childs)
            if m == 0:
                return [self]
        nodes = []
        while index < n and self.childs[index].end < beg:
            index += 1
        while index < n and self.childs[index].beg < end:
            child = self.childs[index]
            nodes.extend(child.get_span_cfg_nodes(beg, end))
            index += 1
        return nodes

    @staticmethod
    def common_ancient(n1, n2):
        if n1 == n2.parent:
            return n1
        if n1.parent == n2:
            return n2
        if n1.parent == n2.parent:
            return n1.parent
        if n1.parent and n2.parent:
            return CFGNode.common_ancient(n1.parent, n2.parent)

    @staticmethod
    def compute_span(cfg_root, cfg_leaves, sentence):
        beg = 0
        last_node = None
        # Compute span for every leaf nodes
        for node in cfg_leaves:
            pos = sentence.find(node.value, beg)
            if pos != -1:  # The token is found in sentence
                node.beg = pos
                beg = node.end = pos + len(node.value)
                # If last_node's span is not set
                if last_node and last_node.end == last_node.beg:
                    last_node.end = pos - 1
            else:  # Leave the span to set later
                if last_node and last_node.end != last_node.beg:
                    node.end = node.beg = last_node.end + 1
                beg += len(node.value)
            last_node = node

        if last_node and last_node.end == last_node.beg:
            last_node.end = len(sentence)

        # Compute span for every inner node
        def compute_span(node):
            if len(node.childs) != 0:
                for child in node.childs:
                    compute_span(child)
                node.beg = node.childs[0].beg
                node.end = node.childs[-1].end

        compute_span(cfg_root)

    @staticmethod
    def from_sexp(string):
        return CFGNode._from_sexp(sexpdata.loads(string))

    @staticmethod
    def _from_sexp(sexp, leaves=None, parent=None):
        '''
        Create a CFG Tree from a sexp, return all leaf nodes and the root node
        '''
        if leaves is None:
            leaves = []
        node = None
        # The node is a inner node
        if isinstance(sexp, list):
            value = sexp[0].value()
            # If the node has only 1 child and its label is the same
            # as its child's, replace this node with its child
            while len(sexp) == 2 and \
                    isinstance(sexp[1], list) and \
                    sexp[1][0].value() == value:
                sexp = sexp[1]
            node = CFGNode(value, [], parent)
            node.childs = [CFGNode._from_sexp(subexp, leaves, node)[0]
                           for subexp in sexp[1:]]
            for index, child in enumerate(node.childs):
                child.index = index
        # The node is a leaf node
        elif isinstance(sexp, str):
            node = CFGNode(sexp, [], parent)
            leaves.append(node)
        return node, leaves
