#
# BenchIE: A Framework for Multi-Faceted Fact-BasedOpen Information Extraction Evaluation
#
# File: reproduce_experiments.ipynb
#
# Authors: Deleted for purposes of anonymity
#
# Proprietor: Deleted for purposes of anonymity --- PROPRIETARY INFORMATION
#
# The software and its source code contain valuable trade secrets and shall be maintained in
# confidence and treated as confidential information. The software may only be used for
# evaluation and/or testing purposes, unless otherwise explicitly stated in the terms of a
# license agreement or nondisclosure agreement with the proprietor of the software.
# Any unauthorized publication, transfer to third parties, or duplication of the object or
# source code---either totally or in part---is strictly prohibited.
#
# Copyright (c) 2021 Proprietor: Deleted for purposes of anonymity
# All Rights Reserved.
#
# THE PROPRIETOR DISCLAIMS ALL WARRANTIES, EITHER EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS FOR A PARTICULAR PURPOSE AND THE WARRANTY AGAINST LATENT
# DEFECTS, WITH RESPECT TO THE PROGRAM AND ANY ACCOMPANYING DOCUMENTATION.
#
# NO LIABILITY FOR CONSEQUENTIAL DAMAGES:
# IN NO EVENT SHALL THE PROPRIETOR OR ANY OF ITS SUBSIDIARIES BE
# LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES
# FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF INFORMATION, OR
# OTHER PECUNIARY LOSS AND INDIRECT, CONSEQUENTIAL, INCIDENTAL,
# ECONOMIC OR PUNITIVE DAMAGES) ARISING OUT OF THE USE OF OR INABILITY
# TO USE THIS PROGRAM, EVEN IF the proprietor HAS BEEN ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGES.
#
# For purposes of anonymity, the identity of the proprietor is not given herewith.
# The identity of the proprietor will be given once the review of the
# conference submission is completed.
#
# THIS HEADER MAY NOT BE EXTRACTED OR MODIFIED IN ANY WAY.
#
from benchie import Benchie
from scores import Scores
import numpy as np
import pdb
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import copy
import utils
import stanza
import seaborn as sns
import matplotlib.font_manager as font_manager
# Input variables
gold_annotations_filename = "data/gold/benchie_gold_annotations_en_with_sent_id.txt"
gold_annotations_ent_filename = "data/gold/benchie_gold_annotations_en_ent_with_sent_id.txt"
clausie_extractions_filename = "data/oie_systems_explicit_extractions/clausie_explicit.txt"
minie_extractions_filename = "data/oie_systems_explicit_extractions/minie_explicit.txt"
stanford_extractions_filename = "data/oie_systems_explicit_extractions/stanford_explicit.txt"
openie6_extractions_filename = "data/oie_systems_explicit_extractions/openie6_explicit.txt"
roie_extractions_filename = "data/oie_systems_explicit_extractions/roi_explicit.txt"
m2oie_extraction_file = "data/oie_systems_explicit_extractions/m2oie_en_explicit.txt"
naive_extractions_filename = "data/oie_systems_explicit_extractions/naive_oie_extractions.txt"
gold_annotation_file_zh = "data/gold/benchie_gold_annotations_zh_with_sent_id.txt"
multi2oie_extractions_zh_file = "data/oie_systems_explicit_extractions/m2oie_zh_explicit.txt"
gold_annotation_file_de = "data/gold/benchie_gold_annotations_de_with_sent_id.txt"
multi2oie_extractions_de_file = "data/oie_systems_explicit_extractions/m2oie_de_explicit.txt"
def add_oie_systems(benchie):
"""
Given a benchie object, add the extractions from the paper.
"""
benchie.add_oie_system_extractions(oie_system_name="ClausIE", filename=clausie_extractions_filename)
benchie.add_oie_system_extractions(oie_system_name="MinIE", filename=minie_extractions_filename)
benchie.add_oie_system_extractions(oie_system_name="Stanford", filename=stanford_extractions_filename)
benchie.add_oie_system_extractions(oie_system_name="OpenIE6", filename=openie6_extractions_filename)
benchie.add_oie_system_extractions(oie_system_name="ROIE", filename=roie_extractions_filename)
benchie.add_oie_system_extractions(oie_system_name="M2OIE (EN)", filename=m2oie_extraction_file)
benchie.add_oie_system_extractions(oie_system_name="Naive OIE", filename=naive_extractions_filename)
# Load BenchIE and its facets
oie_systems = ['Naive OIE', 'ClausIE', 'MinIE', 'Stanford', 'ROIE', 'OpenIE6', 'M2OIE (EN)']
# Define BenchIE
print("Loading BenchIE ... ")
benchie = Benchie()
benchie.load_gold_annotations(filename=gold_annotations_filename)
add_oie_systems(benchie)
print("Loading BenchIE ZH ... ")
benchie_zh = Benchie()
benchie_zh.load_gold_annotations(filename=gold_annotation_file_zh)
benchie_zh.add_oie_system_extractions(oie_system_name="multi2oie_zh", filename=multi2oie_extractions_zh_file)
print("Loading BenchIE DE ... ")
benchie_de = Benchie()
benchie_de.load_gold_annotations(filename=gold_annotation_file_de)
benchie_de.add_oie_system_extractions(oie_system_name="multi2oie_de", filename=multi2oie_extractions_de_file)
print("Loading BenchIE-E ... ")
benchie_e = Benchie()
benchie_e.load_gold_annotations(filename=gold_annotations_ent_filename)
add_oie_systems(benchie_e)
print("Loading BenchIE-C ...")
benchie_c = Benchie()
benchie_c.load_gold_annotations(filename=gold_annotations_filename)
add_oie_systems(benchie_c)
print("Loading BenchIE-M ... ")
benchie_min = Benchie()
benchie_min.load_gold_annotations(filename=gold_annotations_filename, load_mode="minimal")
add_oie_systems(benchie_min)
print("Loading done!")
Loading BenchIE ... Loading BenchIE ZH ... Loading BenchIE DE ... Loading BenchIE-E ... Loading BenchIE-C ... Loading BenchIE-M ... Loading done!
def triple2string(triple):
return triple[0] + "\t" + triple[1] + "\t" + triple[2]
def compute_stats(benchie_v):
g_annotations = benchie_v.gold_annotations.golden_annotations
triple_synset_count = 0
extractions_count = 0
sent_count = len(benchie_v.gold_annotations.sentences)
extraction_length = []
for sent_id in g_annotations:
triple_synset_count += len(g_annotations[sent_id])
for synset in g_annotations[sent_id]:
extractions_set = set()
for triple in synset:
triple_length = 0
triple_length += len(triple[0].split(" "))
triple_length += len(triple[1].split(" "))
triple_length += len(triple[2].split(" "))
extraction_length.append(triple_length)
extractions_set.add(triple2string(triple))
extractions_count += len(extractions_set)
print("Total number of extractions: " + str(extractions_count))
print("Total number of triple synset: " + str(triple_synset_count))
print("Number of extractions per triple synset: " + str(round(extractions_count/triple_synset_count, 1)))
print("Stats for English:")
compute_stats(benchie)
print("-------------------")
print("Stats for German:")
compute_stats(benchie_de)
print("-------------------")
print("Stats for Chinese:")
compute_stats(benchie_zh)
Stats for English: Total number of extractions: 136357 Total number of triple synset: 1350 Number of extractions per triple synset: 101.0 ------------------- Stats for German: Total number of extractions: 82260 Total number of triple synset: 1086 Number of extractions per triple synset: 75.7 ------------------- Stats for Chinese: Total number of extractions: 5318 Total number of triple synset: 994 Number of extractions per triple synset: 5.4
# Write scores from CaRB
print("\nCaRB scores")
print("------------------")
carb_scores_clausie = Scores()
carb_scores_clausie.precision = 0.58
carb_scores_clausie.recall = 0.53
carb_scores_clausie.f1 = 0.56
carb_scores_clausie.print_scores("ClausIE")
carb_scores_minie = Scores()
carb_scores_minie.precision = 0.45
carb_scores_minie.recall = 0.44
carb_scores_minie.f1 = 0.44
carb_scores_minie.print_scores("MinIE")
carb_scores_stanford = Scores()
carb_scores_stanford.precision = 0.17
carb_scores_stanford.recall = 0.28
carb_scores_stanford.f1 = 0.22
carb_scores_stanford.print_scores("Stanford")
carb_scores_roie_n = Scores()
carb_scores_roie_n.precision = 0.44
carb_scores_roie_n.recall = 0.60
carb_scores_roie_n.f1 = 0.51
carb_scores_roie_n.print_scores("ROIE")
carb_scores_oie6 = Scores()
carb_scores_oie6.precision = 0.48
carb_scores_oie6.recall = 0.67
carb_scores_oie6.f1 = 0.56
carb_scores_oie6.print_scores("OpenIE6")
carb_scores_naive = Scores()
carb_scores_naive.precision = 0.19
carb_scores_naive.recall = 0.70
carb_scores_naive.f1 = 0.35
carb_scores_naive.print_scores("Naive OIE")
carb_scores_m2oie_en = Scores()
carb_scores_m2oie_en.precision = 0.60
carb_scores_m2oie_en.recall = 0.61
carb_scores_m2oie_en.f1 = 0.61
carb_scores_m2oie_en.print_scores("M2OIE (EN)")
carb_scores = {}
carb_scores['ClausIE'] = carb_scores_clausie
carb_scores['MinIE'] = carb_scores_minie
carb_scores['Stanford'] = carb_scores_stanford
carb_scores['OpenIE6'] = carb_scores_oie6
carb_scores['ROIE'] = carb_scores_roie_n
carb_scores['M2OIE (EN)'] = carb_scores_m2oie_en
carb_scores['Naive OIE'] = carb_scores_naive
CaRB scores ------------------ ClausIE precision: 0.58 ClausIE recall: 0.53 ClausIE f1: 0.56 =============== MinIE precision: 0.45 MinIE recall: 0.44 MinIE f1: 0.44 =============== Stanford precision: 0.17 Stanford recall: 0.28 Stanford f1: 0.22 =============== ROIE precision: 0.44 ROIE recall: 0.6 ROIE f1: 0.51 =============== OpenIE6 precision: 0.48 OpenIE6 recall: 0.67 OpenIE6 f1: 0.56 =============== Naive OIE precision: 0.19 Naive OIE recall: 0.7 Naive OIE f1: 0.35 =============== M2OIE (EN) precision: 0.6 M2OIE (EN) recall: 0.61 M2OIE (EN) f1: 0.61 ===============
print("BenchIE scores")
print("-----------------")
benchie.compute_scores()
benchie.print_scores()
print("\nBenchIE (ZH) scores")
print("-----------------")
benchie_zh.compute_scores()
benchie_zh.print_scores()
print("\nBenchIE (DE) scores")
print("-----------------")
benchie_de.compute_scores()
benchie_de.print_scores()
BenchIE scores ----------------- ClausIE precision: 0.5 ClausIE recall: 0.26 ClausIE f1: 0.34 =============== MinIE precision: 0.43 MinIE recall: 0.28 MinIE f1: 0.34 =============== Stanford precision: 0.11 Stanford recall: 0.16 Stanford f1: 0.13 =============== OpenIE6 precision: 0.31 OpenIE6 recall: 0.21 OpenIE6 f1: 0.25 =============== ROIE precision: 0.2 ROIE recall: 0.09 ROIE f1: 0.13 =============== M2OIE (EN) precision: 0.39 M2OIE (EN) recall: 0.16 M2OIE (EN) f1: 0.23 =============== Naive OIE precision: 0.03 Naive OIE recall: 0.02 Naive OIE f1: 0.03 =============== BenchIE (ZH) scores ----------------- multi2oie_zh precision: 0.26 multi2oie_zh recall: 0.13 multi2oie_zh f1: 0.17 =============== BenchIE (DE) scores ----------------- multi2oie_de precision: 0.09 multi2oie_de recall: 0.03 multi2oie_de f1: 0.04 ===============
gold_annotations = benchie.gold_annotations.golden_annotations
err_stats = {}
for oie_system in oie_systems:
err_stats[oie_system] = {}
for oie_system in oie_systems:
print("Computing error stats for " + oie_system + " ...")
total_error_count = 0
slots_err_count = {}
slots_err_count["[0 0 0]"] = 0
slots_err_count["[0 0 1]"] = 0
slots_err_count["[0 1 0]"] = 0
slots_err_count["[0 1 1]"] = 0
slots_err_count["[1 0 0]"] = 0
slots_err_count["[1 0 1]"] = 0
slots_err_count["[1 1 0]"] = 0
# Count the exact error slots
for tup in benchie.oie_system_extractions[oie_system].extractions:
triple = tup[1:]
sent_id = tup[0]
slot_match_stats = utils.get_slot_matches_stats(triple, sent_id, gold_annotations)
# Skip the correct extractions (they match on all 3 slots)
if slot_match_stats['max_match'] == 3:
continue
else:
match_slots = slot_match_stats['max_match_slots']
if len(match_slots) >= 1:
unique_match_slots = set()
for slots in match_slots:
unique_match_slots.update([str(slots)])
for slots in unique_match_slots:
total_error_count += 1
slots_err_count[slots] += 1
# Populate the error stats dict
err_stats[oie_system]['(0, 0, 0)'] = slots_err_count['[0 0 0]'] / total_error_count
err_stats[oie_system]['(0, 0, 1)'] = slots_err_count['[0 0 1]'] / total_error_count
err_stats[oie_system]['(0, 1, 0)'] = slots_err_count['[0 1 0]'] / total_error_count
err_stats[oie_system]['(0, 1, 1)'] = slots_err_count['[0 1 1]'] / total_error_count
err_stats[oie_system]['(1, 0, 0)'] = slots_err_count['[1 0 0]'] / total_error_count
err_stats[oie_system]['(1, 0, 1)'] = slots_err_count['[1 0 1]'] / total_error_count
err_stats[oie_system]['(1, 1, 0)'] = slots_err_count['[1 1 0]'] / total_error_count
err_stats[oie_system]['subj_err_count'] = slots_err_count['[0 0 0]'] + slots_err_count['[0 0 1]'] + slots_err_count['[0 1 0]'] + slots_err_count['[0 1 1]']
err_stats[oie_system]['rel_err_count'] = slots_err_count['[0 0 0]'] + slots_err_count['[0 0 1]'] + slots_err_count['[1 0 0]'] + slots_err_count['[1 0 1]']
err_stats[oie_system]['obj_err_count'] = slots_err_count['[0 0 0]'] + slots_err_count['[0 1 0]'] + slots_err_count['[1 0 0]'] + slots_err_count['[1 1 0]']
err_stats[oie_system]['subj_err_frac'] = err_stats[oie_system]['subj_err_count'] / total_error_count
err_stats[oie_system]['rel_err_frac'] = err_stats[oie_system]['rel_err_count'] / total_error_count
err_stats[oie_system]['obj_err_frac'] = err_stats[oie_system]['obj_err_count'] / total_error_count
err_stats[oie_system]['errors_sum'] = total_error_count
print("\nDone!")
Computing error stats for Naive OIE ... Computing error stats for ClausIE ... Computing error stats for MinIE ... Computing error stats for Stanford ... Computing error stats for ROIE ... Computing error stats for OpenIE6 ... Computing error stats for M2OIE (EN) ... Done!
def plot_slot_errors(error_stats, oie_systems):
slot_errs = np.zeros((len(oie_systems), 3))
for i in range(len(oie_systems)):
oie_system = oie_systems[i]
for j in range(3):
if j == 0:
slot_errs[i, j] = error_stats[oie_system]['subj_err_frac']
if j == 1:
slot_errs[i, j] = error_stats[oie_system]['rel_err_frac']
if j == 2:
slot_errs[i, j] = error_stats[oie_system]['obj_err_frac']
X = np.arange(len(oie_systems))
fig, ax = plt.subplots(figsize=(6, 3))
plt.xticks(range(len(oie_systems)), oie_systems, size='large', rotation=20, weight="bold")
plt.yticks(weight='bold', size='large')
plt.bar(X + 0.00, slot_errs[:,0], color = 'g', width = 0.25, label='subject')
plt.bar(X + 0.25, slot_errs[:,1], color = 'r', width = 0.25, label="relation")
plt.bar(X + 0.50, slot_errs[:,2], color = 'b', width = 0.25, label="object")
font = font_manager.FontProperties(weight='bold')
plt.legend(bbox_to_anchor=(1.27, 1.05),loc='upper right', prop=font)
fig = plt.figure()
plot_slot_errors(err_stats, oie_systems)
<Figure size 432x288 with 0 Axes>
def plot_per_slot_errors(error_stats, oie_systems_names):
sns.set(font_scale=1.3)
heat_map_values = np.zeros((len(error_stats), 7))
#per_slot_error_keys = sorted(error_stats[0].keys())
per_slot_error_keys = [
'(0, 0, 0)',
'(0, 0, 1)',
'(0, 1, 0)',
'(0, 1, 1)',
'(1, 0, 0)',
'(1, 0, 1)',
'(1, 1, 0)'
]
for i in range(len(oie_systems_names)):
oie_system = oie_systems_names[i]
for j in range(len(per_slot_error_keys)):
pse_key = per_slot_error_keys[j] #PSE = Per-Slot Error
heat_map_values[i, j] = np.round(error_stats[oie_system][pse_key], 2)
pd_h_map = pd.DataFrame(heat_map_values, columns=per_slot_error_keys, index=oie_systems_names)
sns.set(font_scale=1.3)
fig, ax = plt.subplots(figsize=(5, 2.5))
sns.set_theme()
#ax = sns.heatmap(pd_h_map, vmin=0.0, vmax=1.0, center=1.0, annot=True, annot_kws={"weight": "bold"})
ax = sns.heatmap(pd_h_map, vmin=0.0, vmax=1.0, center=1.0, annot=True)
ax.tick_params(axis='x', rotation=25)
# Plot errors
plot_per_slot_errors(err_stats, oie_systems)
# Compute multi-faceted scores
print("Computing multi-faceted scores ... ")
print("Computing scores for BenchIE-E ... ")
benchie_e.compute_scores()
print("Computing scores for BenchIE-C ... ")
benchie_c.compute_scores(match_type='lexical')
print("Computing scores for BenchIE-M ... ")
benchie_min.compute_scores()
print("Done!")
Computing multi-faceted scores ... Computing scores for BenchIE-E ... Computing scores for BenchIE-C ... Computing scores for BenchIE-M ... Done!
def plot_scores(scores, oie_systems, plot_title):
p_scores = np.zeros((len(oie_systems), len(scores)))
r_scores = np.zeros((len(oie_systems), len(scores)))
f1_scores = np.zeros((len(oie_systems), len(scores)))
benchmarks = ['carb', 'benchie-concat', 'benchie-regular', 'benchie-entity', 'benchie-min']
for x in benchmarks:
for i in range(len(oie_systems)):
if x == 'carb':
p_scores[i][0] = scores[x][oie_systems[i]].precision
r_scores[i][0] = scores[x][oie_systems[i]].recall
f1_scores[i][0] = scores[x][oie_systems[i]].f1
if x == 'benchie-concat':
p_scores[i][1] = scores[x][oie_systems[i]].precision
r_scores[i][1] = scores[x][oie_systems[i]].recall
f1_scores[i][1] = scores[x][oie_systems[i]].f1
if x == 'benchie-regular':
p_scores[i][2] = scores[x][oie_systems[i]].precision
r_scores[i][2] = scores[x][oie_systems[i]].recall
f1_scores[i][2] = scores[x][oie_systems[i]].f1
if x == 'benchie-entity':
p_scores[i][3] = scores[x][oie_systems[i]].precision
r_scores[i][3] = scores[x][oie_systems[i]].recall
f1_scores[i][3] = scores[x][oie_systems[i]].f1
if x == 'benchie-min':
p_scores[i][4] = scores[x][oie_systems[i]].precision
r_scores[i][4] = scores[x][oie_systems[i]].recall
f1_scores[i][4] = scores[x][oie_systems[i]].f1
X = np.arange(len(oie_systems))
fig, ax = plt.subplots(figsize=(8, 4))
if plot_title == 'Precision':
plt.xticks(range(len(oie_systems)), oie_systems, size='large', rotation=18)
plt.bar(X + 0.00, p_scores[:,0], color = 'black', width = 0.15, label='CaRB')
plt.bar(X + 0.15, p_scores[:,1], color = 'g', width = 0.15, label='BenchIE-C')
plt.bar(X + 0.30, p_scores[:,2], color = 'r', width = 0.15, label='BenchIE')
plt.bar(X + 0.45, p_scores[:,3], color = 'b', width = 0.15, label='BenchIE-E')
plt.bar(X + 0.60, p_scores[:,4], color = 'orange', width = 0.15, label='BenchIE-M')
plt.title(plot_title)
plt.legend(bbox_to_anchor=(1.32, 1),loc='upper right')
fig = plt.figure()
elif plot_title == 'Recall':
plt.xticks(range(len(oie_systems)), oie_systems, size='large', rotation=20)
plt.bar(X + 0.00, r_scores[:,0], color = 'black', width = 0.15, label='CaRB')
plt.bar(X + 0.15, r_scores[:,1], color = 'g', width = 0.15, label='BenchIE-C')
plt.bar(X + 0.30, r_scores[:,2], color = 'r', width = 0.15, label='BenchIE')
plt.bar(X + 0.45, r_scores[:,3], color = 'b', width = 0.15, label='BenchIE-E')
plt.bar(X + 0.60, r_scores[:,4], color = 'orange', width = 0.15, label='BenchIE-M')
plt.title(plot_title)
plt.legend(bbox_to_anchor=(1.32, 1),loc='upper right')
fig = plt.figure()
elif plot_title == 'F1':
plt.xticks(range(len(oie_systems)), oie_systems, size='large', rotation=20)
plt.yticks(size='large')
plt.bar(X + 0.00, f1_scores[:,0], color = 'black', width = 0.15, label='CaRB')
plt.bar(X + 0.15, f1_scores[:,1], color = 'g', width = 0.15, label='BenchIE-C')
plt.bar(X + 0.30, f1_scores[:,2], color = 'r', width = 0.15, label='BenchIE')
plt.bar(X + 0.45, f1_scores[:,3], color = 'b', width = 0.15, label='BenchIE-E')
plt.bar(X + 0.60, f1_scores[:,4], color = 'orange', width = 0.15, label='BenchIE-M')
plt.title(plot_title)
font = font_manager.FontProperties(size=12)
plt.legend(bbox_to_anchor=(1.15, 1),loc='upper right', prop=font)
fig = plt.figure()
else:
print("Nothing to plot")
oie_systems_names = ["Naive OIE", "ClausIE", "MinIE", "Stanford", "ROIE", "OpenIE6", "M2OIE (EN)"]
scores = {}
scores['carb'] = carb_scores
scores['benchie-regular'] = benchie.scores
scores['benchie-entity'] = benchie_e.scores
scores['benchie-min'] = benchie_min.scores
scores['benchie-concat'] = benchie_c.scores
plot_scores(scores, oie_systems_names, 'F1')
<Figure size 432x288 with 0 Axes>
def update_sent_length_bucket(s_id, sent, sent_length_buckets_ids):
sent_length = len(sent.tokens)
if sent_length <= 20:
sent_length_buckets_ids['<=20'].append(s_id)
elif sent_length > 20 and sent_length <= 30:
sent_length_buckets_ids['21-30'].append(s_id)
else:
sent_length_buckets_ids['>30'].append(s_id)
def update_conj_buckets(s_id, sent, cc_conj_buckets):
# Get dictionary of heads where each key is a token ID, and the value is the
dep_rels_count = {}
cc_counts = 0
conj_counts = 0
for word in sent.words:
if word.deprel not in dep_rels_count:
dep_rels_count[word.deprel] = 0
dep_rels_count[word.deprel] += 1
# Update cc dependencies counts
if 'cc' in dep_rels_count:
cc_counts += dep_rels_count['cc']
# Update conj dependencies counts
if 'conj' in dep_rels_count:
conj_counts += dep_rels_count['conj']
if cc_counts == 0:
cc_conj_buckets['cc']['0'].append(s_id)
else:
cc_conj_buckets['cc']['>=1'].append(s_id)
if conj_counts == 0:
cc_conj_buckets['conj']['0'].append(s_id)
else:
cc_conj_buckets['conj']['>=1'].append(s_id)
def update_case_buckets_ids(s_id, sent, buckets):
count = 0
for word in sent.words:
if word.deprel == 'case':
count+=1
if count <= 1:
buckets['<=1'].append(s_id)
elif count == 2:
buckets['2'].append(s_id)
elif count == 3:
buckets['3'].append(s_id)
elif count >= 4:
buckets['>=4'].append(s_id)
sent_length_buckets_ids = {
'<=20': [],
'21-30': [],
'>30': []
}
cc_conj_buckets_ids = {
'cc': {'0': [], '>=1': []},
'conj': {'0': [], '>=1': []}
}
case_buckets_ids = {
'<=1': [],
'2': [],
'3': [],
'>=4': []
}
sents = benchie.gold_annotations.sentences
nlp = stanza.Pipeline(lang='en', processors='tokenize,mwt,pos,lemma,depparse', tokenize_no_ssplit=True)
counter = 0
for s_id in sents:
counter += 1
if counter % 10 == 0:
print("Sent #:" + str(s_id) + " / 300")
s_nlp = nlp(sents[s_id])
sent = s_nlp.sentences[0]
update_sent_length_bucket(s_id, sent, sent_length_buckets_ids)
update_conj_buckets(s_id, sent, cc_conj_buckets_ids)
update_case_buckets_ids(s_id, sent, case_buckets_ids)
2021-11-15 11:33:50 WARNING: Can not find mwt: default from official model list. Ignoring it. 2021-11-15 11:33:50 INFO: Loading these models for language: en (English): ======================== | Processor | Package | ------------------------ | tokenize | combined | | pos | combined | | lemma | combined | | depparse | combined | ======================== 2021-11-15 11:33:50 INFO: Use device: cpu 2021-11-15 11:33:50 INFO: Loading: tokenize 2021-11-15 11:33:50 INFO: Loading: pos 2021-11-15 11:33:51 INFO: Loading: lemma 2021-11-15 11:33:51 INFO: Loading: depparse 2021-11-15 11:33:52 INFO: Done loading processors!
Sent #:10 / 300 Sent #:20 / 300 Sent #:30 / 300 Sent #:40 / 300 Sent #:50 / 300 Sent #:60 / 300 Sent #:70 / 300 Sent #:80 / 300 Sent #:90 / 300 Sent #:100 / 300 Sent #:110 / 300 Sent #:120 / 300 Sent #:130 / 300 Sent #:140 / 300 Sent #:150 / 300 Sent #:160 / 300 Sent #:170 / 300 Sent #:180 / 300 Sent #:190 / 300 Sent #:200 / 300 Sent #:210 / 300 Sent #:220 / 300 Sent #:230 / 300 Sent #:240 / 300 Sent #:250 / 300 Sent #:260 / 300 Sent #:270 / 300 Sent #:280 / 300 Sent #:290 / 300 Sent #:300 / 300
print("Sent length buckets size (bucket, count): ")
sent_length_buckets = list(sent_length_buckets_ids.keys())
for id in sent_length_buckets:
print(id + "\t" +str(len(sent_length_buckets_ids[id])))
print("-----------------")
print("CONJ buckets size (bucket, count): ")
cc_conj_buckets = list(cc_conj_buckets_ids['conj'].keys())
for id in cc_conj_buckets_ids['conj']:
print(id + "\t" +str(len(cc_conj_buckets_ids['conj'][id])))
print("-----------------")
print("CONJ buckets size (bucket, count): ")
case_buckets = list(case_buckets_ids.keys())
for id in case_buckets_ids:
print(id + "\t" +str(len(case_buckets_ids[id])))
Sent length buckets size (bucket, count): <=20 120 21-30 113 >30 67 ----------------- CONJ buckets size (bucket, count): 0 150 >=1 150 ----------------- CONJ buckets size (bucket, count): <=1 75 2 62 3 71 >=4 92
def get_scores(oie_systems_names, bucket_ids, bucket_names, benchie, round_num=2):
bucket_scores_p = np.zeros((len(oie_systems_names), len(bucket_ids)+1))
bucket_scores_r = np.zeros((len(oie_systems_names), len(bucket_ids)+1))
bucket_scores_f1 = np.zeros((len(oie_systems_names), len(bucket_ids)+1))
for i in range(len(oie_systems_names)):
oie_sys = oie_systems_names[i]
print("Computing buckets for oie system: " + oie_sys)
for j in range(len(bucket_names)):
bucket_id = bucket_names[j]
benchie_subset = benchie.get_subset(bucket_ids[bucket_id])
benchie_subset.compute_precision()
benchie_subset.compute_recall()
benchie_subset.compute_f1()
bucket_scores_p[i, j] = np.round(benchie_subset.scores[oie_sys].precision, round_num)
bucket_scores_r[i, j] = np.round(benchie_subset.scores[oie_sys].recall, round_num)
bucket_scores_f1[i, j] = np.round(benchie_subset.scores[oie_sys].f1, round_num)
#pdb.set_trace()
for i in range(len(oie_systems_names)):
oie_sys = oie_systems_names[i]
all_ind = len(bucket_ids)
bucket_scores_p[i, all_ind] = np.round(benchie.scores[oie_sys].precision, round_num)
bucket_scores_r[i, all_ind] = np.round(benchie.scores[oie_sys].recall, round_num)
bucket_scores_f1[i, all_ind] = np.round(benchie.scores[oie_sys].f1, round_num)
return bucket_scores_p, bucket_scores_r, bucket_scores_f1
def plot_heat_map(values, cols, rows, title):
"""
fig, ax = plt.subplots(figsize=(10,10)) # Sample figsize in inches
sns.heatmap(df1.iloc[:, 1:6:], annot=True, linewidths=.5, ax=ax)
"""
sns.set(font_scale=1.4)
fig, ax = plt.subplots(figsize=(5, 5))
ax.set_title(title, fontdict= {'fontsize': 16, 'fontweight':'bold'})
pd_h_map = pd.DataFrame(values, columns=cols, index=rows)
sns.set_theme()
ax = sns.heatmap(pd_h_map, vmin=0.0, vmax=1.0, center=1.0, annot=True, annot_kws={"fontsize":16, "weight": "bold"})
# Computing scores for heatmap
sent_bucket_names = list(sent_length_buckets)
print("Compute scores for sentence length ... ")
buckets_sentl_p, buckets_sentl_r, buckets_sentl_f1 = get_scores(
oie_systems,
sent_length_buckets_ids,
sent_bucket_names,
benchie
)
sent_length_buckets.append("all")
print("------------")
print("Compute scores for conjs ...")
conj_bucket_names = list(cc_conj_buckets)
buckets_conj_p, buckets_conj_r, buckets_conj_f1 = get_scores(
oie_systems,
cc_conj_buckets_ids['conj'],
conj_bucket_names,
benchie
)
conj_bucket_names.append("all")
print("------------")
print("Compute scores for case markers ...")
case_bucket_names = list(case_buckets)
buckets_case_p, buckets_case_r, buckets_case_f1 = get_scores(
oie_systems,
case_buckets_ids,
case_bucket_names,
benchie
)
case_bucket_names.append("all")
Compute scores for sentence length ... Computing buckets for oie system: Naive OIE Computing buckets for oie system: ClausIE Computing buckets for oie system: MinIE Computing buckets for oie system: Stanford Computing buckets for oie system: ROIE Computing buckets for oie system: OpenIE6 Computing buckets for oie system: M2OIE (EN) ------------ Compute scores for conjs ... Computing buckets for oie system: Naive OIE Computing buckets for oie system: ClausIE Computing buckets for oie system: MinIE Computing buckets for oie system: Stanford Computing buckets for oie system: ROIE Computing buckets for oie system: OpenIE6 Computing buckets for oie system: M2OIE (EN) ------------ Compute scores for case markers ... Computing buckets for oie system: Naive OIE Computing buckets for oie system: ClausIE Computing buckets for oie system: MinIE Computing buckets for oie system: Stanford Computing buckets for oie system: ROIE Computing buckets for oie system: OpenIE6 Computing buckets for oie system: M2OIE (EN)
plot_heat_map(buckets_sentl_f1, sent_length_buckets, oie_systems, "F1")
plot_heat_map(buckets_conj_f1, conj_bucket_names, oie_systems, "F1")
plot_heat_map(buckets_case_f1, case_bucket_names, oie_systems, "F1")
plot_scores(scores, oie_systems_names, 'Precision')
plot_scores(scores, oie_systems_names, 'Recall')
plot_scores(scores, oie_systems_names, 'F1')
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>