import json
import requests
import datetime
from utils import set_logger
from tqdm.auto import tqdm
from pathlib import Path
import os

def main():
    logger = set_logger()
    S = requests.Session()
    URL = "https://en.wikipedia.org/w/api.php"


    with open("./timestamp_mention.json", "r") as f:
        js = json.load(f)
    
    for key, value in tqdm(js.items(), desc="total"):
        entity = key.split('/')[-1]

        for item in tqdm(value, desc=f"entity: {entity}", leave=False):
            mention = item[0]
            timestamp = item[1]
            dt_object = datetime.datetime.utcfromtimestamp(timestamp)

            # REMOVE:START
            path = Path(f'./wikipedia/{entity}###{mention}.mediawiki')

            if os.path.exists(path):
                continue
            # REMOVE:END

            PARAMS = {
                "action": "query",
                "prop": "revisions",
                "titles": entity,
                "rvlimit": "1",
                "rvprop": "timestamp|user|comment|content",
                "rvdir": "older", # "newer",
                "rvstart": dt_object,
                "rvend": "2000-05-01T00:00:00Z", # "2024-05-02T00:00:00Z",
                "rvslots": "main",
                "formatversion": "2",
                "format": "json"
            }
            try:
                R = S.get(url=URL, params=PARAMS)
                data = R.json()
                context = data['query']['pages'][0]['revisions'][0]['slots']['main']['content']

                path = Path(f'./wikipedia/{entity}###{mention}.mediawiki')

                f = open(path, 'w')
                f.write(context)
                f.close()
                logger.info(f"Write wikipedia (entity: {entity}, mention: {mention}, timestamp: {timestamp})")
            except:
                logger.warning(f"Failed to write wikipedia (entity: {entity}, mention: {mention}, timestamp: {timestamp})")


if __name__ == "__main__":
    main()

