import ast
import logging
import subprocess
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import *


@dataclass
class EvaultationItem(ABC):
    """
    An object for a benchmark item with original and predicted code and testcases.
    """

    generated_code: str
    ground_truth_code: Optional[str]
    test_list: List[str]

    @abstractmethod
    def generate_test_file(self):
        pass


@dataclass
class PythonEvaluationItem(EvaultationItem):
    """
    Evaluation Item for Python
    """

    def generate_test_file(self) -> str:
        test_program = "\n".join(self.test_list)
        return f"{self.generated_code}\n\n{test_program}"


class Evaluator(ABC):
    """
    General Evaluation Wrapper which computes exact, syntax and execution match
    """

    check_syntax: bool = True
    check_exact: bool = True
    check_execution: bool = True

    def evaluate(self, eval_item: EvaultationItem) -> Dict[str, bool]:
        eval_dict = {}
        if self.check_exact:
            eval_dict["exact"] = self.check_exact_match(eval_item)
        if self.check_exact:
            eval_dict["syntax"] = self.check_syntax(eval_item)
        if self.check_exact:
            eval_dict["execution"] = self.check_execution(eval_item)
        return eval_dict

    @abstractmethod
    def check_exact_match(self, eval_item: EvaultationItem) -> bool:
        pass

    @abstractmethod
    def check_syntax(self, eval_item: EvaultationItem) -> bool:
        pass

    @abstractmethod
    def check_execution(self, eval_item: EvaultationItem) -> bool:
        pass

    @abstractmethod
    def execute(self, code: str) -> bool:
        pass


class PythonEvaluator(Evaluator):
    def check_exact_match(self, eval_item: EvaultationItem):
        return eval_item.generated_code == eval_item.ground_truth_code

    def check_syntax(self, eval_item: EvaultationItem):
        try:
            ast.parse(eval_item.generated_code)
            return True
        except SyntaxError:
            return False

    def check_execution(self, eval_item: EvaultationItem):
        code = eval_item.generate_test_file()
        return self.execute(code)

    def execute(self, code: str) -> bool:
        try:
            # Run the Python code using subprocess
            result = subprocess.run(
                ["python", "-c", code],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                timeout=50,
            )
            # Check the return code to determine success or failure
            if result.returncode == 0:
                return True
            else:
                return False
        except Exception as e:
            logging.error("Timeout error occurred")
            return False

def executing_code(code, test_list):
    if isinstance(test_list, list):
        flag = []
        eval_item = PythonEvaluationItem(code, "", test_list)
        evaluator = PythonEvaluator()

        eval_dict = evaluator.evaluate(eval_item)
        if eval_dict["execution"]:
            # print('solved PASSED')
            flag = ['original']
        
    if isinstance(test_list, dict):
        flag = []
        for key, tests in test_list.items():
            eval_item = PythonEvaluationItem(code, "", tests)
            evaluator = PythonEvaluator()

            eval_dict = evaluator.evaluate(eval_item)
            if eval_dict["execution"]:
                # print('solved PASSED')
                flag.append(key)

        eval_dict["execution"] = False
        if flag:
            eval_dict["execution"] = True
    return {"execution": eval_dict["execution"], "solved_test_list": flag}


def main():
    predicted_code = "def is_not_prime(num):\n    if num > 1:\n        for i in range(2, num):\n            if num % i == 0:\n                return True\n        return False\n    else:\n        return True"
    # ground_truth_code = "def F(input_list):\r\n    min_length = min(len(x) for x in input_list )  \r\n    min_list = min(input_list, key = lambda i: len(i))\r\n    return(min_length, min_list)"
    ground_truth_code = "" # doesn't matter unless we check for exact match
    test_list = [
            "assert is_not_prime(2) == False",
            "assert is_not_prime(10) == True",
            "assert is_not_prime(35) == True"
        ]
    print(executing_code(predicted_code, test_list))
    

    augmented = {
            "original": [
                "assert is_not_prime(2) == False",
                "assert is_not_prime(10) == True",
                "assert is_not_prime(35) == True"
            ],
            "ip_resp-NumToStr--original": [
                "assert is_not_prime('2') == False",
                "assert is_not_prime('10') == True",
                "assert is_not_prime('35') == True"
            ]
        }
    print(executing_code(predicted_code, augmented))
    return


if __name__ == "__main__":
    main()
