import re
import json
import tiktoken
import config


def get_table_input(prompt):
    if config.FMT3_TAG:
        
        pattern = r'\nInput: (.*?)\nFormat_Dict: '
    else:
        pattern = r'(?<=Input: ).+?(?=\n\n###\n\n)'
    
    matches = re.findall(pattern, prompt, re.DOTALL)
    sheet = matches[0].strip()
    first_c = sheet[1]
    rows_ = sheet.split('\n|'+first_c)
    rows = [rows_[0]]
    for k in range(1,len(rows_)):
        rows.append("|"+ first_c+rows_[k])
    
    return rows

def get_table_cell_input(prompt):
    input_pattern = r'\nCell_Input: (.*?)\nNFS_Input:' 
   
    matches = re.findall(input_pattern, prompt, re.DOTALL)
    
    sheet = matches[0].strip()      
    first_c = sheet[1]
    rows_ = sheet.split('\n|' + first_c)
    rows = [rows_[0]] + ["|" + first_c + row for row in rows_[1:]]
    return rows

def get_table_nfs_input(prompt):
    if config.FMT1_TAG and config.FMT3_TAG:
        input_pattern = r'\nNFS_Input: (.*?)\nFormat_Input: ' 
    elif config.FMT3_TAG and not config.FMT1_TAG:
        input_pattern = r'\nNFS_Input: (.*?)\nFormat_Dict: '
    else: 
        input_pattern = r'\nNFS_Input: (.*?)\n\n###\n\n'
    matches = re.findall(input_pattern, prompt, re.DOTALL)
    sheet = matches[0].strip()     
    
    first_c = sheet[1]
    rows_ = sheet.split('\n|' + first_c)
    rows = [rows_[0]] + ["|" + first_c + row for row in rows_[1:]]
    return rows

def get_table_fmt_input(prompt):
    
    input_pattern = r'\nFormat_Dict: (.*?)\n\n###\n\n'  
      
    matches = re.findall(input_pattern, prompt, re.DOTALL)
    sheet = matches[0].strip()      

    return sheet

def parse_excel_row(row):
    cell_starts = [match.start() for match in re.finditer(r'\|[A-Z]+\d+,', row)]
    cells = []
    for i in range(len(cell_starts)):
        start = cell_starts[i] + 1
        end = cell_starts[i + 1] if i + 1 < len(cell_starts) else len(row)
        cells.append(row[start:end])
    return cells


def excel_address_to_coords(address):
    column_letters, row_numbers = '', ''
    for char in address:
        if char.isalpha():
            column_letters += char
        else:
            row_numbers += char

 
    row_index = int(row_numbers) - 1
    column_index = 0
    for i, letter in enumerate(reversed(column_letters)):
        column_index += (ord(letter.upper()) - 64) * (26 ** i)

    return (row_index, column_index - 1)


def col_num_to_letter(col_num):
 
    letter = ''
    while col_num > 0:
        col_num, remainder = divmod(col_num - 1, 26)
        letter = chr(remainder + ord('A')) + letter
    return letter

def col_letter_to_num(col_str):
 
    num = 0
    for c in col_str:
        num = num * 26 + (ord(c) - ord('A') + 1)
    return num

def tuple_to_excel_cell(coord):
    row, col = coord
    excel_col = ''
    while col >= 0:
        excel_col = chr(col % 26 + 65) + excel_col
        col = col // 26 - 1
    excel_row = str(row + 1)
    return excel_col + excel_row


def parse_excel_row_value(row):
    cell_starts = [match.start() for match in re.finditer(r'\|[A-Z]+\d+,', row)]
    cells = []
    for i in range(len(cell_starts)):
        start = cell_starts[i] + 1
        end = cell_starts[i + 1] if i + 1 < len(cell_starts) else len(row)
        cells.append(row[start:end])
    cell_values = []
    for cell in cells:
        cell_value = cell.split(",",1)[1].strip()   
        if all(char.isdigit() or char == ',' for char in cell_value):
            cell_value = cell_value.replace(',', '')
        cell_values.append(cell_value)
    return cell_values

def cal_tokens(input_file_path):
    
    enc = tiktoken.get_encoding("cl100k_base")
    length = 0
    with open(input_file_path,'r',encoding='utf-8') as input_file:
            for line in input_file:
                data = json.loads(line)
                length += len(enc.encode(data["messages"][0]["content"])) + len(enc.encode(data["messages"][1]["content"])) + len(enc.encode(data["messages"][2]["content"]))

    return length

def rows_to_new_input(txt_rows, nfs_rows = None, nfs_tag = False):
    if nfs_tag:
        if nfs_rows is None:
            raise ValueError("nfs_rows must be provided when nfs_tag is True.")
        else:
            input_change = "\nCell_Input: " + "\n".join(txt_rows) + "\n\nNFS_Input: " + "\n".join(nfs_rows) + "\n\n###\n\n"
            return input_change
    else:
        input_change = "\nInput: " + "\n".join(txt_rows) + "\n\n###\n\n"

def get_new_prompt(prompt, input_change):
    start1 = prompt.find("Instruction :")
    if start1 != -1:
        start2 = prompt.find("Input:")
        if start2 != -1:
            extracted_text = prompt[start1 + len("Instruction :"):start2].strip()

    prompt_change = "Instruction :" + extracted_text + input_change
    return prompt_change
