# Merge several outputs from merge_post_urls.py

from pathlib import Path
import argparse
import json
from collections import defaultdict

def merge_sub2domain_maps(paths):
    sub2domain = defaultdict(lambda: defaultdict(int))
    for path in paths:
        with open(path) as f:
            sub2domain_i = json.load(f)
            for subreddit, domain_counts in sub2domain_i.items():
                for domain, count in domain_counts.items():
                    sub2domain[subreddit][domain] += count
    return sub2domain

def main():
    # Parse command-line arguments
    parser = argparse.ArgumentParser(description='Merge several subreddit to domain maps')
    parser.add_argument('--input_dir', required=True, help='Path to the directory containing the input JSON files')
    parser.add_argument('--output_file', required=True, help='Path to the output JSON file')
    args = parser.parse_args()

    input_dir = Path(args.input_dir)
    input_files = list(input_dir.glob('*.json'))

    sub2domain = merge_sub2domain_maps(input_files)

    with open(args.output_file, 'w') as f:
        json.dump(sub2domain, f)

if __name__ == '__main__':
    main()

# Example usage: python data/merge_post_urls.py --input_dir /mnt/e/reddit/sub2domain_maps --output_file sub2domain.json