import json
import sys
from match import has_answer, SimpleTokenizer
from tqdm import tqdm

tok = SimpleTokenizer()
train=True
if train:
    data = json.load(open('/data/timchen0618/open_domain_data/TQA/train.json'))
else:
    data = json.load(open('/data/timchen0618/open_domain_data/TQA/dev.json'))

recall = 0
topk = int(sys.argv[1])
for x in tqdm(data):
    ans = x['answers']
    ctxs = x['ctxs']
    has_ans = False
    for c in ctxs[:topk]:
        if has_answer(ans, c['text'], tok):
            has_ans = True
            break

    if has_ans:
        recall += 1
print(float(recall)/ len(data))
