"""
Builds a dataset of chess games with random moves. This dataset is used to train a model to predict legal moves in a chess game.
Games are saved in UCI format ("e2e4 d7d5...").
"""

import os
import chess
import random
from tqdm import tqdm
from datasets import load_dataset


def build_dataset(num_games, filename):
        rows = []
        for _ in tqdm(range(num_games)):
            all_moves = []
            board = chess.Board()
            while not board.is_game_over() and len(all_moves) < 150:
                moves = board.legal_moves
                move = random.choice(list(moves))
                board.push(move)
                all_moves.append(move.uci())
            rows.append(' '.join(all_moves))
            
        with open(filename, 'w') as f:
            f.write('\n'.join(rows))


def main():
    dataset_dir = './datasets/random_chess'
    splits = {'train': 200000, 'validation': 1000, 'test': 1000}

    os.makedirs(dataset_dir, exist_ok=True)
    
    for split, num_games in splits.items():
        filename = os.path.join(dataset_dir, split + '.txt')
        build_dataset(num_games, filename)



def test():
    dataset = load_dataset('text', data_dir='./datasets/random_chess')
    print(dataset)
    for i in range(5):
        print(dataset['train'][i])



if __name__ == '__main__':
    main()
    test()
