import sys
import os

print(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))

from src.util.load_config import get_config, print_config
from datasets import load_dataset

# get the configuration
config = get_config()
print_config(config)

# do the filtering
data = load_dataset(config["root_dataset"], split="train")

# filter the data
data = data.filter(lambda x: x[config["column_to_filter"]] == config["filter_value"])

# push the data to HF working
data.push_to_hub(config["working_organization"] + "/" + config["subset_name"])
