#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
from typing import List, Tuple, Pattern


def iter_indicated_text_areas(raw_text: str,
                              indicator: str,
                              indicator_context_range: Tuple[int, int], ):

    indicator_left_context_size, indicator_right_context_size = indicator_context_range
    indicator_matches = list(re.finditer(indicator, raw_text, flags=re.IGNORECASE))

    for m_idx, indicator_match in enumerate(indicator_matches):

        indicator_start_offset, indicator_end_offset = indicator_match.span()

        if m_idx > 0:
            prev_match = indicator_matches[m_idx - 1]
            prev_indicator_end_offset = prev_match.span()[-1]
        else:
            prev_indicator_end_offset = 0

        if m_idx < len(indicator_matches) - 1:
            next_match = indicator_matches[m_idx + 1]
            next_indicator_start_offset = next_match.span()[0]
        else:
            next_indicator_start_offset = len(raw_text)

        # Specify offsets of a text area where the desired span (containing indicators) can be found
        area_start_offset = max(prev_indicator_end_offset, indicator_start_offset - indicator_left_context_size)
        area_end_offset = min(next_indicator_start_offset, indicator_end_offset + indicator_right_context_size + 1)

        # print('\n^indicator offset:', indicator_start_offset, indicator_end_offset)
        # print('^area offset     :', area_start_offset, area_end_offset)

        #potential_text_area = raw_text[area_start_offset:area_end_offset]
        # print('^potential area  :', potential_text_area)

        yield area_start_offset, area_end_offset


def extract_spans_with_context(raw_text: str,
                               regex: Pattern,
                               indicator: str = None,
                               indicator_context_range: Tuple[int, int] = None,
                               ) -> List[Tuple[Tuple[int, int], str, str]]:

    # [NOTE] Search for indicative literal strings first to avoid relying solely on inefficient regex search
    spans_with_context = []

    #indicator_left_context_size, indicator_right_context_size = indicator_context_range
    #indicator_matches = list(re.finditer(indicator, raw_text, flags=re.IGNORECASE))

    if indicator:
        text_area_iterator = iter_indicated_text_areas(raw_text, indicator, indicator_context_range)
    else:
        # If no indicator is specified, just use a single whole area ...
        text_area_iterator = [(0, len(raw_text))]

    for area_start_offset, area_end_offset in text_area_iterator:

        potential_text_area = raw_text[area_start_offset:area_end_offset]

        # Examine matches obtained from each potential text area
        for span_match in regex.finditer(potential_text_area):

            context = span_match.group(0)

            span1_text = span_match.group(1)
            span1_offset = span_match.span(1)
            span1_offset = span1_offset[0] + area_start_offset, span1_offset[1] + area_start_offset

            spans_with_context.append((span1_offset, span1_text, context))

            try:
                span2_text = span_match.group(2)
                span2_offset = span_match.span(2)
                span2_offset = span2_offset[0] + area_start_offset, span2_offset[1] + area_start_offset
                spans_with_context.append((span2_offset, span2_text, context))

            except IndexError:
                pass

            #print('^detected spans', span_offset, span_text)

    return spans_with_context
    #matches = list(REGEX_EMAIL_ADDR.finditer(raw_text))
    #return [(match.span(), match.group()) for match in matches]


def check_overlap_with_spans(start: int,
                             end: int,
                             spans: List[Tuple[Tuple[int, int], str]]):
    # Check if this username is found within URL expressions

    for span in spans:
        #(start_offset, end_offset), span_text = span
        start_offset, end_offset = span[0]
        #print(f'   URL: {start_offset} {end_offset} | text: {start} {end}')

        if set(range(start, end)).intersection(set(range(start_offset, end_offset))):
            return span

    return None


def remove_overlapping_spans_from_back(spans: List[Tuple[Tuple[int, int], str, str]]):
    valid_spans = []

    # 뒤에서부터(=우선순위가 낮은 것부터) 하나씩 확인하여, 앞에 있는 것(=우선순위가 높은 것)과 범위가 겹치면 탈락시키기
    for this_i in reversed(range(len(spans))):
        (this_start_offset, this_end_offset), span_text, extra = spans[this_i]
        higher_url_spans = [(offsets, text) for other_i, (offsets, text, extra) in enumerate(spans)
                            if other_i < this_i]

        if this_i == 0 or not check_overlap_with_spans(this_start_offset, this_end_offset, higher_url_spans):
            if span_text[-1] == ')' and '(' not in span_text:
                span_text = span_text[:-1]

            url_span = (this_start_offset, this_end_offset), span_text, extra
            valid_spans.insert(0, url_span)

    return valid_spans
