Tokenizer: bert-base-cased Model: bert-base-cased
	Data split: fullData
			------------EPOCH 1---------------
Loss:  tensor(4.3514, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.5772, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.0533, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.0969, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.8568, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.6292, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.6505, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.5289, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.6113, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.6671, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.4593, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.5885, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.0308, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.1447, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.7979, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.1347, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.3377, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.1572, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.8899, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.9603, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.0779, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.9660, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.4837, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4403, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.1246, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.8657, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9559, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.3884, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1075, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4764, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.8337, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5882, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8091880341880342, 'B-C': 0.25, 'I-C': 0.3783783783783784, 'B-P': 0.41226353555120676, 'I-P': 0.6623302966927794, 'B-MC': 0.12962962962962962, 'I-MC': 0.23655913978494625}, 'recall': {'O': 0.8500561167227834, 'B-C': 0.0022026431718061676, 'I-C': 0.0020964360587002098, 'B-P': 0.5471861471861472, 'I-P': 0.9552658486707567, 'B-MC': 0.032407407407407406, 'I-MC': 0.04964539007092199}, 'f1': {'O': 0.8291187739463602, 'B-C': 0.004366812227074236, 'I-C': 0.004169769173492182, 'B-P': 0.47023809523809523, 'I-P': 0.782273764417743, 'B-MC': 0.05185185185185184, 'I-MC': 0.08206767918998135}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.6928812036829104, 'recall': 0.6928812036829104, 'f1': 0.6928812036829104, 'support': None}, 'macro_avg': {'precision': 0.4111927163178536, 'recall': 0.3484085698983605, 'f1': 0.31772667800637117, 'support': None}, 'weighted_avg': {'precision': 0.620890642852441, 'recall': 0.6928812036829104, 'f1': 0.6112996729832283, 'support': None}}
			------------EPOCH 2---------------
Loss:  tensor(2.4225, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.5595, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.4272, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.4968, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.3060, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.3241, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.3024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.3481, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.3194, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.5454, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.3014, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.5041, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.0943, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.1577, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.1816, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.0522, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.3135, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.0670, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.0791, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.1885, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.0439, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.0166, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.1275, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3290, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6288, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4082, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6948, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.8504, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9114, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1513, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3534, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4891, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9273000752068188, 'B-C': 0.3076923076923077, 'I-C': 0.36981132075471695, 'B-P': 0.4570411794611083, 'I-P': 0.7198359168540213, 'B-MC': 0.26878612716763006, 'I-MC': 0.35605731654363876}, 'recall': {'O': 0.8303030303030303, 'B-C': 0.01762114537444934, 'I-C': 0.014675052410901468, 'B-P': 0.7783549783549784, 'I-P': 0.9330265848670757, 'B-MC': 0.4305555555555556, 'I-MC': 0.5286911669890393}, 'f1': {'O': 0.8761250592136429, 'B-C': 0.03333333333333334, 'I-C': 0.028229871813337176, 'B-P': 0.5759128763613068, 'I-P': 0.812682296885089, 'B-MC': 0.33096085409252674, 'I-MC': 0.4255319148936171}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.720525488434763, 'recall': 0.720525488434763, 'f1': 0.720525488434763, 'support': None}, 'macro_avg': {'precision': 0.48664632052574885, 'recall': 0.5047467876935757, 'f1': 0.44039660094183614, 'support': None}, 'weighted_avg': {'precision': 0.6910639682946385, 'recall': 0.720525488434763, 'f1': 0.6706893437558225, 'support': None}}
			------------EPOCH 3---------------
Loss:  tensor(1.7855, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.8300, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7884, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.8760, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6426, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7716, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7171, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7729, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7411, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.9907, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6019, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.9199, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6067, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.5994, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7297, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.5101, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7066, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.5119, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6214, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6175, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.5194, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4944, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6960, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2576, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2558, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9550, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5040, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3224, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6505, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8970, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0964, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3897, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9408521303258145, 'B-C': 0.3993174061433447, 'I-C': 0.5146882494004796, 'B-P': 0.569774919614148, 'I-P': 0.8048907388137357, 'B-MC': 0.32461873638344224, 'I-MC': 0.40969696969696967}, 'recall': {'O': 0.8426487093153759, 'B-C': 0.2577092511013216, 'I-C': 0.25711290805630427, 'B-P': 0.7670995670995671, 'I-P': 0.8699897750511247, 'B-MC': 0.6898148148148148, 'I-MC': 0.7627337201805287}, 'f1': {'O': 0.8890467732386027, 'B-C': 0.3132530120481927, 'I-C': 0.34291991212302775, 'B-P': 0.6538745387453875, 'I-P': 0.836175126529409, 'B-MC': 0.4414814814814815, 'I-MC': 0.5330629717246818}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7526162137884572, 'recall': 0.7526162137884572, 'f1': 0.7526162137884572, 'support': None}, 'macro_avg': {'precision': 0.5662627357682763, 'recall': 0.6353012493741481, 'f1': 0.5728305451272547, 'support': None}, 'weighted_avg': {'precision': 0.7620843809589003, 'recall': 0.7526162137884572, 'f1': 0.7449826649543567, 'support': None}}
			------------EPOCH 4---------------
Loss:  tensor(1.2385, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2620, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2848, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4854, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1448, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4472, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1939, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3619, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2288, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3650, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1508, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4142, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2873, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1215, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4353, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0886, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3063, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0874, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3520, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2312, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1334, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1421, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3873, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1916, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8979, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6531, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3611, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8965, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4773, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6764, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9402, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3436, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9065295169946332, 'B-C': 0.38215488215488214, 'I-C': 0.4649610678531702, 'B-P': 0.6399662731871838, 'I-P': 0.8821426443424895, 'B-MC': 0.40100250626566414, 'I-MC': 0.49173387096774196}, 'recall': {'O': 0.9099887766554433, 'B-C': 0.5, 'I-C': 0.5007487271638215, 'B-P': 0.6571428571428571, 'I-P': 0.7569018404907976, 'B-MC': 0.7407407407407407, 'I-MC': 0.7862669245647969}, 'f1': {'O': 0.9082558530301332, 'B-C': 0.433206106870229, 'I-C': 0.48219178082191777, 'B-P': 0.6484408372490389, 'I-P': 0.81473736345376, 'B-MC': 0.5203252032520326, 'I-MC': 0.6050607789630366}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7611947002021109, 'recall': 0.7611947002021109, 'f1': 0.7611947002021109, 'support': None}, 'macro_avg': {'precision': 0.5954986802522522, 'recall': 0.6931128381083509, 'f1': 0.630316846234307, 'support': None}, 'weighted_avg': {'precision': 0.785989703575582, 'recall': 0.7611947002021109, 'f1': 0.7686973397040096, 'support': None}}
			------------EPOCH 5---------------
Loss:  tensor(1.1132, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0286, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0158, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0516, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2015, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9415, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1956, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0500, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1764, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8780, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1126, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9461, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7938, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1791, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8518, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1289, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9614, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2621, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0390, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8679, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8540, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1865, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1205, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6895, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5345, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3493, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8082, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4527, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6852, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8582, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2788, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9505526959750232, 'B-C': 0.3897849462365591, 'I-C': 0.45773363065071965, 'B-P': 0.7147117296222664, 'I-P': 0.9058898847631242, 'B-MC': 0.39195979899497485, 'I-MC': 0.5000991080277503}, 'recall': {'O': 0.8428731762065096, 'B-C': 0.6387665198237885, 'I-C': 0.6762503743635819, 'B-P': 0.6225108225108225, 'I-P': 0.7234151329243353, 'B-MC': 0.7222222222222222, 'I-MC': 0.8133462282398453}, 'f1': {'O': 0.8934803299492385, 'B-C': 0.48414023372287146, 'I-C': 0.5459381044487428, 'B-P': 0.6654326700601574, 'I-P': 0.8044343376918703, 'B-MC': 0.50814332247557, 'I-MC': 0.6193690929176384}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7549741747136762, 'recall': 0.7549741747136762, 'f1': 0.7549741747136761, 'support': None}, 'macro_avg': {'precision': 0.6158188277529169, 'recall': 0.7199120680415864, 'f1': 0.6458482987522985, 'support': None}, 'weighted_avg': {'precision': 0.8111051712586684, 'recall': 0.7549741747136762, 'f1': 0.7711945025925929, 'support': None}}
			------------EPOCH 6---------------
Loss:  tensor(0.8379, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7519, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8037, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8297, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6858, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0405, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6287, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9527, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7392, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9667, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6174, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8469, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7772, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6188, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0508, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6491, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9560, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6966, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0174, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8586, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6653, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6183, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0392, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0761, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5034, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3845, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2413, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6469, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3566, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5884, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7401, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2500, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9185000378014667, 'B-C': 0.40634441087613293, 'I-C': 0.506254786826653, 'B-P': 0.6855241264559068, 'I-P': 0.8739453818827708, 'B-MC': 0.4891304347826087, 'I-MC': 0.6417144581949894}, 'recall': {'O': 0.9090160867938646, 'B-C': 0.5925110132158591, 'I-C': 0.5938903863432166, 'B-P': 0.7134199134199134, 'I-P': 0.804959100204499, 'B-MC': 0.625, 'I-MC': 0.685364281108962}, 'f1': {'O': 0.9137334536702767, 'B-C': 0.48207885304659504, 'I-C': 0.5465821389195149, 'B-P': 0.6991938905388205, 'I-P': 0.8380349159037683, 'B-MC': 0.5487804878048781, 'I-MC': 0.66282151208106}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7907927240062879, 'recall': 0.7907927240062879, 'f1': 0.7907927240062879, 'support': None}, 'macro_avg': {'precision': 0.645916233831504, 'recall': 0.7034515401551877, 'f1': 0.6701750359949877, 'support': None}, 'weighted_avg': {'precision': 0.8044779989467824, 'recall': 0.7907927240062879, 'f1': 0.7962076745066756, 'support': None}}
			------------EPOCH 7---------------
Loss:  tensor(0.5521, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5514, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6095, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6384, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6841, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0933, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5754, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9381, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5216, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5570, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4064, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5339, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7639, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6191, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2006, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7095, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6980, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6308, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9006, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6787, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6766, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8443, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1033, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0650, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4628, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3713, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2022, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5131, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2311, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3483, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5928, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1825, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9453494054406255, 'B-C': 0.48291571753986334, 'I-C': 0.5423538230884558, 'B-P': 0.5938256658595642, 'I-P': 0.7860250721578426, 'B-MC': 0.7054263565891473, 'I-MC': 0.8624409979770735}, 'recall': {'O': 0.8684624017957351, 'B-C': 0.4669603524229075, 'I-C': 0.4333632824198862, 'B-P': 0.8493506493506493, 'I-P': 0.9328220858895706, 'B-MC': 0.4212962962962963, 'I-MC': 0.41231463571889104}, 'f1': {'O': 0.9052762937253831, 'B-C': 0.4748040313549832, 'I-C': 0.48177126685533544, 'B-P': 0.6989668685429284, 'I-P': 0.8531550277043929, 'B-MC': 0.5275362318840581, 'I-MC': 0.5579062159214832}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7929485739950595, 'recall': 0.7929485739950595, 'f1': 0.7929485739950595, 'support': None}, 'macro_avg': {'precision': 0.7026195769503676, 'recall': 0.6263671005562765, 'f1': 0.6427737051412236, 'support': None}, 'weighted_avg': {'precision': 0.7941581067658905, 'recall': 0.7929485739950595, 'f1': 0.783099914639013, 'support': None}}
			------------EPOCH 8---------------
Loss:  tensor(0.5717, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5515, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5213, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8038, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3824, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9164, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3484, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7616, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4266, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4500, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3801, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4339, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6074, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3969, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8078, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4382, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3761, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2997, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7510, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5865, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3974, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4690, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8138, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0631, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2412, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1571, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1229, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2733, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1799, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2808, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5782, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2027, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9280604453018246, 'B-C': 0.397677793904209, 'I-C': 0.4758565187399402, 'B-P': 0.7038664323374341, 'I-P': 0.8847952549332725, 'B-MC': 0.4684385382059801, 'I-MC': 0.6063287744703674}, 'recall': {'O': 0.8638982416760195, 'B-C': 0.6035242290748899, 'I-C': 0.6197963462114405, 'B-P': 0.6935064935064935, 'I-P': 0.7931492842535788, 'B-MC': 0.6527777777777778, 'I-MC': 0.7288845905867183}, 'f1': {'O': 0.894830659536542, 'B-C': 0.47944006999125116, 'I-C': 0.5383714880332986, 'B-P': 0.6986480593109463, 'I-P': 0.8364695098937834, 'B-MC': 0.5454545454545455, 'I-MC': 0.6619821402430098}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7787109813608803, 'recall': 0.7787109813608803, 'f1': 0.7787109813608803, 'support': None}, 'macro_avg': {'precision': 0.6378605368418612, 'recall': 0.7079338518695597, 'f1': 0.6650280674947682, 'support': None}, 'weighted_avg': {'precision': 0.8053765753546112, 'recall': 0.7787109813608803, 'f1': 0.7884996913641775, 'support': None}}
			------------EPOCH 9---------------
Loss:  tensor(0.3057, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2696, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3107, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4378, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2500, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7298, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2840, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5915, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2400, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2791, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2519, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3112, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4497, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2324, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7250, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1707, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2813, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1811, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6484, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3950, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2095, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2072, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7569, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0216, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2267, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1567, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0916, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2450, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1402, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2493, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5658, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2031, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9375298804780876, 'B-C': 0.5, 'I-C': 0.5776284728527329, 'B-P': 0.7100401606425703, 'I-P': 0.8544577089154178, 'B-MC': 0.4281767955801105, 'I-MC': 0.526282457251425}, 'recall': {'O': 0.8803591470258136, 'B-C': 0.4889867841409692, 'I-C': 0.47634022162324047, 'B-P': 0.7653679653679654, 'I-P': 0.8599182004089979, 'B-MC': 0.7175925925925926, 'I-MC': 0.8036750483558994}, 'f1': {'O': 0.9080455334748215, 'B-C': 0.49443207126948774, 'I-C': 0.5221173574066474, 'B-P': 0.7366666666666667, 'I-P': 0.857179258504268, 'B-MC': 0.5363321799307958, 'I-MC': 0.6360505166475316}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7976869526162138, 'recall': 0.7976869526162138, 'f1': 0.7976869526162138, 'support': None}, 'macro_avg': {'precision': 0.6477307822457635, 'recall': 0.7131771370736397, 'f1': 0.6701176548428885, 'support': None}, 'weighted_avg': {'precision': 0.805587094912804, 'recall': 0.7976869526162138, 'f1': 0.7984135005133193, 'support': None}}
			------------EPOCH 10---------------
Loss:  tensor(0.1650, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1506, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1623, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3933, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1428, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6116, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1686, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5189, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1214, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1753, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1789, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1889, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4425, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1944, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6738, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1721, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2983, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1368, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6816, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4565, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1197, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1401, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6945, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0127, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1642, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0745, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0763, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1273, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0649, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1949, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5566, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1884, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.932199290551435, 'B-C': 0.49382716049382713, 'I-C': 0.5649411324985983, 'B-P': 0.7415929203539823, 'I-P': 0.8750607418605907, 'B-MC': 0.33613445378151263, 'I-MC': 0.43504725292327406}, 'recall': {'O': 0.8651702207257763, 'B-C': 0.44052863436123346, 'I-C': 0.452680443246481, 'B-P': 0.7255411255411255, 'I-P': 0.8285787321063395, 'B-MC': 0.7407407407407407, 'I-MC': 0.8755641521598968}, 'f1': {'O': 0.897434902402111, 'B-C': 0.46565774155995343, 'I-C': 0.5026186715437692, 'B-P': 0.7334792122538292, 'I-P': 0.8511856306294479, 'B-MC': 0.46242774566473993, 'I-MC': 0.5812734082397004}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7794071412530879, 'recall': 0.7794071412530879, 'f1': 0.779407141253088, 'support': None}, 'macro_avg': {'precision': 0.6255432789233172, 'recall': 0.7041148641259419, 'f1': 0.6420110446133644, 'support': None}, 'weighted_avg': {'precision': 0.8050879740915468, 'recall': 0.7794071412530879, 'f1': 0.7851216596570858, 'support': None}}
			------------EPOCH 11---------------
Loss:  tensor(0.1939, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1399, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2542, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4345, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1170, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6584, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1088, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4548, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0910, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1240, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1345, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1408, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4116, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1181, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6892, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1520, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3109, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1350, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6415, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4507, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1105, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1244, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6604, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0072, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1253, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0495, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0729, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0937, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0740, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2084, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4996, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1516, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8862121988723731, 'B-C': 0.5050505050505051, 'I-C': 0.5803793830235677, 'B-P': 0.7378048780487805, 'I-P': 0.861970810475106, 'B-MC': 0.4437869822485207, 'I-MC': 0.5487640449438203}, 'recall': {'O': 0.9055742611298166, 'B-C': 0.44052863436123346, 'I-C': 0.4535789158430668, 'B-P': 0.7333333333333333, 'I-P': 0.851482617586912, 'B-MC': 0.6944444444444444, 'I-MC': 0.7872340425531915}, 'f1': {'O': 0.8957886166827029, 'B-C': 0.47058823529411764, 'I-C': 0.5092040010086577, 'B-P': 0.7355623100303951, 'I-P': 0.856694614474564, 'B-MC': 0.5415162454873645, 'I-MC': 0.6467161016949152}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7955535593981585, 'recall': 0.7955535593981585, 'f1': 0.7955535593981585, 'support': None}, 'macro_avg': {'precision': 0.6519955432375248, 'recall': 0.6951680356074285, 'f1': 0.6651528749532452, 'support': None}, 'weighted_avg': {'precision': 0.7963110314321673, 'recall': 0.7955535593981585, 'f1': 0.7930817027445352, 'support': None}}
			------------EPOCH 12---------------
Loss:  tensor(0.1324, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1364, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2550, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4156, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1140, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5573, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1501, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3909, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1396, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1447, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1586, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1264, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3331, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1240, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6306, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0793, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2660, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1218, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6453, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4100, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1591, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1582, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6534, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0105, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1401, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0952, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1231, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0851, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0373, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1753, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4651, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1650, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8758100121056754, 'B-C': 0.3863275039745628, 'I-C': 0.45116772823779194, 'B-P': 0.7445414847161572, 'I-P': 0.8931446377564269, 'B-MC': 0.4207492795389049, 'I-MC': 0.5262041162741353}, 'recall': {'O': 0.9202394313505424, 'B-C': 0.5352422907488987, 'I-C': 0.5727762803234502, 'B-P': 0.5904761904761905, 'I-P': 0.7033742331288344, 'B-MC': 0.6759259259259259, 'I-MC': 0.7994842037395229}, 'f1': {'O': 0.8974751897256275, 'B-C': 0.4487534626038781, 'I-C': 0.504750593824228, 'B-P': 0.6586190246257846, 'I-P': 0.7869808946344812, 'B-MC': 0.5186500888099467, 'I-MC': 0.6346769033909149}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7507972153604312, 'recall': 0.7507972153604312, 'f1': 0.7507972153604312, 'support': None}, 'macro_avg': {'precision': 0.6139921089433792, 'recall': 0.6853597936704807, 'f1': 0.6357008796592659, 'support': None}, 'weighted_avg': {'precision': 0.7847858327014243, 'recall': 0.7507972153604312, 'f1': 0.7591300672413691, 'support': None}}
			------------EPOCH 13---------------
Loss:  tensor(0.2071, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2197, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3006, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3797, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2146, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6529, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2031, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4207, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0895, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0951, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0690, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0898, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2705, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0953, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6253, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1228, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3626, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1236, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5759, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3851, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1558, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1544, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6605, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0089, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1525, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0855, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1026, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1897, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0515, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1971, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4738, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1476, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9226804123711341, 'B-C': 0.3618721461187215, 'I-C': 0.4488122962272939, 'B-P': 0.6773888363292336, 'I-P': 0.9006685768863419, 'B-MC': 0.5236363636363637, 'I-MC': 0.6842105263157895}, 'recall': {'O': 0.8839506172839506, 'B-C': 0.698237885462555, 'I-C': 0.7214734950584007, 'B-P': 0.6199134199134199, 'I-P': 0.723159509202454, 'B-MC': 0.6666666666666666, 'I-MC': 0.6789168278529981}, 'f1': {'O': 0.9029003783102144, 'B-C': 0.4766917293233082, 'I-C': 0.5533796588755528, 'B-P': 0.647377938517179, 'I-P': 0.8022118247554233, 'B-MC': 0.5865580448065173, 'I-MC': 0.6815533980582524}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7648776105995958, 'recall': 0.7648776105995958, 'f1': 0.7648776105995958, 'support': None}, 'macro_avg': {'precision': 0.6456098796978397, 'recall': 0.7131883459200635, 'f1': 0.6643818532352067, 'support': None}, 'weighted_avg': {'precision': 0.8113197879388755, 'recall': 0.7648776105995958, 'f1': 0.778329462224524, 'support': None}}
			------------EPOCH 14---------------
Loss:  tensor(0.1469, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1545, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1002, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3176, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0845, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5297, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0898, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3622, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0916, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0932, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0703, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1031, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2863, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0799, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7295, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1445, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2664, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1228, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5749, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3287, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0543, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0723, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5876, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0168, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1067, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0508, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0471, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0554, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0340, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1265, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4648, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1436, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9249304911955515, 'B-C': 0.43134087237479807, 'I-C': 0.49553229512381924, 'B-P': 0.6999117387466902, 'I-P': 0.8859805318138652, 'B-MC': 0.3973333333333333, 'I-MC': 0.5162371673999581}, 'recall': {'O': 0.8960718294051627, 'B-C': 0.5881057268722467, 'I-C': 0.5813117699910153, 'B-P': 0.6865800865800866, 'I-P': 0.7631390593047035, 'B-MC': 0.6898148148148148, 'I-MC': 0.7943262411347518}, 'f1': {'O': 0.9102724888838217, 'B-C': 0.4976700838769805, 'I-C': 0.5350055126791621, 'B-P': 0.6931818181818182, 'I-P': 0.8199846187651065, 'B-MC': 0.5042301184433164, 'I-MC': 0.6257777777777778}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7738154053447114, 'recall': 0.7738154053447114, 'f1': 0.7738154053447114, 'support': None}, 'macro_avg': {'precision': 0.6216094899982879, 'recall': 0.7141927897289688, 'f1': 0.6551603455154262, 'support': None}, 'weighted_avg': {'precision': 0.8015283716939111, 'recall': 0.7738154053447114, 'f1': 0.7827105627886767, 'support': None}}
			------------EPOCH 15---------------
Loss:  tensor(0.1005, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0709, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1932, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3049, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1113, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5632, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1103, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3557, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0327, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0529, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0441, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0565, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2445, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0476, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5747, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0451, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2814, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0380, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5441, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3182, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0594, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1114, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5814, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0053, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0734, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0382, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0666, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0605, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0312, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1362, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3982, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1229, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9010362694300518, 'B-C': 0.4625267665952891, 'I-C': 0.5540955631399317, 'B-P': 0.7216238608119304, 'I-P': 0.8520606930490056, 'B-MC': 0.4915254237288136, 'I-MC': 0.6054846592451806}, 'recall': {'O': 0.9108118219229331, 'B-C': 0.47577092511013214, 'I-C': 0.4862234201856843, 'B-P': 0.7541125541125541, 'I-P': 0.8497955010224949, 'B-MC': 0.6712962962962963, 'I-MC': 0.7188910380399742}, 'f1': {'O': 0.9058976744186046, 'B-C': 0.46905537459283386, 'I-C': 0.5179454458446322, 'B-P': 0.7375105842506351, 'I-P': 0.8509265895361934, 'B-MC': 0.5675146771037183, 'I-MC': 0.6573323507737657}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7973051875140355, 'recall': 0.7973051875140355, 'f1': 0.7973051875140355, 'support': None}, 'macro_avg': {'precision': 0.6554790337143146, 'recall': 0.6952716509557241, 'f1': 0.6723118137886263, 'support': None}, 'weighted_avg': {'precision': 0.7957950299437607, 'recall': 0.7973051875140355, 'f1': 0.7957936264206671, 'support': None}}
			------------EPOCH 16---------------
Loss:  tensor(0.0538, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0361, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0268, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2403, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0357, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4568, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0284, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3035, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0415, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0663, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0914, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1071, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2126, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0744, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5975, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0619, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2374, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0278, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4714, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2364, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0196, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0397, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5310, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0568, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0411, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0216, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0860, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0451, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2038, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4330, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1265, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9356608875457293, 'B-C': 0.4857142857142857, 'I-C': 0.5701081612586038, 'B-P': 0.6552207428170989, 'I-P': 0.8091654956710937, 'B-MC': 0.5701357466063348, 'I-MC': 0.7297592997811816}, 'recall': {'O': 0.8802843247287692, 'B-C': 0.44933920704845814, 'I-C': 0.4341120095837077, 'B-P': 0.8095238095238095, 'I-P': 0.9126278118609407, 'B-MC': 0.5833333333333334, 'I-MC': 0.6450676982591876}, 'f1': {'O': 0.9071282624619298, 'B-C': 0.4668192219679634, 'I-C': 0.49290147071325346, 'B-P': 0.7242447714949651, 'I-P': 0.8577881357968333, 'B-MC': 0.5766590389016019, 'I-MC': 0.6848049281314169}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8035257130024702, 'recall': 0.8035257130024702, 'f1': 0.8035257130024702, 'support': None}, 'macro_avg': {'precision': 0.6793949456277611, 'recall': 0.6734697420483151, 'f1': 0.6729065470668519, 'support': None}, 'weighted_avg': {'precision': 0.7972989910423636, 'recall': 0.8035257130024702, 'f1': 0.797012420002906, 'support': None}}
			------------EPOCH 17---------------
Loss:  tensor(0.0978, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1414, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0205, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2284, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0288, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4224, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0182, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2609, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0194, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0324, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0425, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0633, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2125, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0700, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5781, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1401, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2400, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0524, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4858, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2317, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0140, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0424, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5259, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0018, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0466, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0277, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0068, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0499, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0517, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1195, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4302, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1142, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9167304454308893, 'B-C': 0.5387755102040817, 'I-C': 0.6048845767815323, 'B-P': 0.6556156968876861, 'I-P': 0.7766095047356782, 'B-MC': 0.6108374384236454, 'I-MC': 0.7490317583268784}, 'recall': {'O': 0.8962214739992518, 'B-C': 0.2907488986784141, 'I-C': 0.27073974243785565, 'B-P': 0.8389610389610389, 'I-P': 0.9515848670756646, 'B-MC': 0.5740740740740741, 'I-MC': 0.6234687298517085}, 'f1': {'O': 0.906359956112141, 'B-C': 0.37768240343347637, 'I-C': 0.37405606703217126, 'B-P': 0.7360425370300038, 'I-P': 0.85523927677074, 'B-MC': 0.5918854415274463, 'I-MC': 0.6805066854327938}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7985178531327195, 'recall': 0.7985178531327195, 'f1': 0.7985178531327195, 'support': None}, 'macro_avg': {'precision': 0.6932121329700559, 'recall': 0.6351141178682868, 'f1': 0.6459674810483961, 'support': None}, 'weighted_avg': {'precision': 0.7846234253790296, 'recall': 0.7985178531327195, 'f1': 0.7770110805687444, 'support': None}}
			------------EPOCH 18---------------
Loss:  tensor(0.1659, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1822, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0596, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3611, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0399, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4135, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0165, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2553, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0113, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0176, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0279, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0389, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1794, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0534, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5308, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0795, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2918, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2064, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5622, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2621, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0506, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0571, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5336, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0015, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0671, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0278, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0113, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0293, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0536, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0918, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7517, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1024, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9045872932724688, 'B-C': 0.48188405797101447, 'I-C': 0.5560152768936982, 'B-P': 0.6355876559422193, 'I-P': 0.7761488700327649, 'B-MC': 0.6096256684491979, 'I-MC': 0.7796686072548141}, 'recall': {'O': 0.9044519266741489, 'B-C': 0.29295154185022027, 'I-C': 0.2616052710392333, 'B-P': 0.8380952380952381, 'I-P': 0.9446319018404908, 'B-MC': 0.5277777777777778, 'I-MC': 0.561250805931657}, 'f1': {'O': 0.90451960490871, 'B-C': 0.3643835616438356, 'I-C': 0.3558044806517312, 'B-P': 0.7229275578790142, 'I-P': 0.8521422312410645, 'B-MC': 0.56575682382134, 'I-MC': 0.6526710402999062}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7920053896249719, 'recall': 0.7920053896249719, 'f1': 0.7920053896249719, 'support': None}, 'macro_avg': {'precision': 0.6776453471165969, 'recall': 0.6186806376012522, 'f1': 0.631172185777943, 'support': None}, 'weighted_avg': {'precision': 0.774476563228051, 'recall': 0.7920053896249719, 'f1': 0.7698196584721462, 'support': None}}
			------------EPOCH 19---------------
Loss:  tensor(0.2045, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1834, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1591, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3282, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0467, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4507, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0293, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2761, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0198, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0252, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0280, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0373, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1379, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0784, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5094, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0476, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2297, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0854, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5404, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2638, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3050, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1549, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5759, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0126, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0565, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0285, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0110, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0318, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0137, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1048, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5188, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1244, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8770660424203731, 'B-C': 0.5113350125944585, 'I-C': 0.6120077469335055, 'B-P': 0.6654727793696275, 'I-P': 0.8365962768771946, 'B-MC': 0.4744525547445255, 'I-MC': 0.678839590443686}, 'recall': {'O': 0.9251028806584363, 'B-C': 0.44713656387665196, 'I-C': 0.42587601078167114, 'B-P': 0.8043290043290043, 'I-P': 0.8891615541922291, 'B-MC': 0.6018518518518519, 'I-MC': 0.6411992263056093}, 'f1': {'O': 0.9004442502366907, 'B-C': 0.4770857814336075, 'I-C': 0.502251655629139, 'B-P': 0.7283418267346139, 'I-P': 0.8620783662544301, 'B-MC': 0.5306122448979592, 'I-MC': 0.6594827586206896}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8050976869526162, 'recall': 0.8050976869526162, 'f1': 0.8050976869526162, 'support': None}, 'macro_avg': {'precision': 0.6651100004833387, 'recall': 0.6763795845707792, 'f1': 0.665756697686733, 'support': None}, 'weighted_avg': {'precision': 0.7945611471807569, 'recall': 0.8050976869526162, 'f1': 0.7965166476853826, 'support': None}}
			------------EPOCH 20---------------
Loss:  tensor(0.0238, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0214, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0626, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1992, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0782, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4447, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0603, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2976, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0711, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1404, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0341, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0867, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1269, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0297, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4706, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0245, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1913, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0156, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4529, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2210, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0242, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0360, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4919, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0018, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0601, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0366, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0109, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0379, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0210, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1245, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3899, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0906, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8609467455621301, 'B-C': 0.43206106870229005, 'I-C': 0.5047503045066991, 'B-P': 0.7018800358102059, 'I-P': 0.904550431926025, 'B-MC': 0.42618384401114207, 'I-MC': 0.6419127988748242}, 'recall': {'O': 0.9144781144781144, 'B-C': 0.6233480176211453, 'I-C': 0.6205450733752621, 'B-P': 0.6787878787878788, 'I-P': 0.7601738241308793, 'B-MC': 0.7083333333333334, 'I-MC': 0.7356544165054804}, 'f1': {'O': 0.8869054098182214, 'B-C': 0.5103697024346258, 'I-C': 0.5566899516389038, 'B-P': 0.6901408450704226, 'I-P': 0.82610145008056, 'B-MC': 0.5321739130434783, 'I-MC': 0.6855941114616193}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7800808443745789, 'recall': 0.7800808443745789, 'f1': 0.7800808443745789, 'support': None}, 'macro_avg': {'precision': 0.638897889913331, 'recall': 0.7201886654617277, 'f1': 0.6697107690782615, 'support': None}, 'weighted_avg': {'precision': 0.8008169462244418, 'recall': 0.7800808443745789, 'f1': 0.7859890788855576, 'support': None}}
			------------EPOCH 21---------------
Loss:  tensor(0.0496, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0218, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0280, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2449, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0411, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5840, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0342, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2756, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0766, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0371, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0184, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0306, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1045, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0156, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4429, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0124, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1320, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0122, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4308, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2033, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0175, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0298, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5788, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0038, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0697, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0242, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0133, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0090, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1177, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3856, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0942, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9147105441920145, 'B-C': 0.504, 'I-C': 0.5858178887993554, 'B-P': 0.7315270935960592, 'I-P': 0.8673000306466442, 'B-MC': 0.4785714285714286, 'I-MC': 0.6022857142857143}, 'recall': {'O': 0.906771417882529, 'B-C': 0.5550660792951542, 'I-C': 0.544324648098233, 'B-P': 0.7714285714285715, 'I-P': 0.8680981595092024, 'B-MC': 0.6203703703703703, 'I-MC': 0.6795615731785944}, 'f1': {'O': 0.9107236792665515, 'B-C': 0.5283018867924529, 'I-C': 0.5643095552278197, 'B-P': 0.7509481668773705, 'I-P': 0.8676989115437682, 'B-MC': 0.5403225806451613, 'I-MC': 0.6385943653438352}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8111161015046037, 'recall': 0.8111161015046037, 'f1': 0.8111161015046037, 'support': None}, 'macro_avg': {'precision': 0.6691732428701737, 'recall': 0.7065172599660935, 'f1': 0.6858427350995656, 'support': None}, 'weighted_avg': {'precision': 0.8117444058245203, 'recall': 0.8111161015046037, 'f1': 0.8110778235137662, 'support': None}}
			------------EPOCH 22---------------
Loss:  tensor(0.0122, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0070, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0060, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1670, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0143, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3734, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0077, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2264, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0109, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0204, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0109, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0211, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1194, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0154, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4847, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0173, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1319, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0156, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4428, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1842, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0063, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0180, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4430, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0476, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0147, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0018, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0058, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0755, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3562, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0904, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9418842596382091, 'B-C': 0.4525939177101968, 'I-C': 0.5080966286169366, 'B-P': 0.7213254035683943, 'I-P': 0.8483913395386483, 'B-MC': 0.5665236051502146, 'I-MC': 0.6896778435239974}, 'recall': {'O': 0.8609801720912832, 'B-C': 0.5572687224669604, 'I-C': 0.573225516621743, 'B-P': 0.7350649350649351, 'I-P': 0.8574130879345603, 'B-MC': 0.6111111111111112, 'I-MC': 0.6763378465506125}, 'f1': {'O': 0.8996169181455712, 'B-C': 0.4995064165844028, 'I-C': 0.5386996904024768, 'B-P': 0.7281303602058319, 'I-P': 0.8528783563873068, 'B-MC': 0.5879732739420936, 'I-MC': 0.6829427083333334}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.795823040646755, 'recall': 0.795823040646755, 'f1': 0.795823040646755, 'support': None}, 'macro_avg': {'precision': 0.6754989996780854, 'recall': 0.6959144845487436, 'f1': 0.6842496748572879, 'support': None}, 'weighted_avg': {'precision': 0.8056646141125161, 'recall': 0.795823040646755, 'f1': 0.7998287510003945, 'support': None}}
			------------EPOCH 23---------------
Loss:  tensor(0.0200, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0119, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0108, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1970, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0152, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4314, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0097, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2495, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0052, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0118, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0110, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0153, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0975, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0053, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4391, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0086, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1217, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0074, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4142, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1632, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0061, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0173, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4359, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0329, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0123, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0016, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0059, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0657, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3229, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0921, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8988896242370763, 'B-C': 0.4721115537848606, 'I-C': 0.5776986951364176, 'B-P': 0.6551976573938506, 'I-P': 0.8485798139581157, 'B-MC': 0.531496062992126, 'I-MC': 0.7122994652406417}, 'recall': {'O': 0.9146277590722035, 'B-C': 0.5220264317180616, 'I-C': 0.510482180293501, 'B-P': 0.7748917748917749, 'I-P': 0.8721370143149284, 'B-MC': 0.625, 'I-MC': 0.6441005802707931}, 'f1': {'O': 0.9066904020175048, 'B-C': 0.49581589958158995, 'I-C': 0.5420144685587089, 'B-P': 0.7100357001190003, 'I-P': 0.8601971610821169, 'B-MC': 0.574468085106383, 'I-MC': 0.6764855256475368}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8074781046485515, 'recall': 0.8074781046485515, 'f1': 0.8074781046485515, 'support': None}, 'macro_avg': {'precision': 0.670896124677584, 'recall': 0.6947522486516089, 'f1': 0.6808153203018344, 'support': None}, 'weighted_avg': {'precision': 0.8031709163611331, 'recall': 0.8074781046485515, 'f1': 0.8046413917303916, 'support': None}}
			------------EPOCH 24---------------
Loss:  tensor(0.0129, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0060, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0041, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1503, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0115, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3574, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0065, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2142, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0056, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0099, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0076, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0134, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0875, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0042, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4221, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0058, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0913, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0040, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3697, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1341, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0036, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0117, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4016, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0005, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0332, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0109, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0033, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0016, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0675, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3293, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0812, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9171182172021675, 'B-C': 0.5010309278350515, 'I-C': 0.5841907561652785, 'B-P': 0.7308641975308642, 'I-P': 0.8624263361105466, 'B-MC': 0.4791666666666667, 'I-MC': 0.6227973568281938}, 'recall': {'O': 0.8991395435839881, 'B-C': 0.5352422907488987, 'I-C': 0.5356394129979035, 'B-P': 0.7688311688311689, 'I-P': 0.8678936605316974, 'B-MC': 0.6388888888888888, 'I-MC': 0.7292069632495164}, 'f1': {'O': 0.9080398972343963, 'B-C': 0.5175718849840256, 'I-C': 0.5588625888602453, 'B-P': 0.7493670886075949, 'I-P': 0.8651513607175619, 'B-MC': 0.5476190476190476, 'I-MC': 0.6718146718146718}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.810711879631709, 'recall': 0.810711879631709, 'f1': 0.810711879631709, 'support': None}, 'macro_avg': {'precision': 0.6710849226198242, 'recall': 0.7106917041188661, 'f1': 0.6883466485482206, 'support': None}, 'weighted_avg': {'precision': 0.8114665072348374, 'recall': 0.810711879631709, 'f1': 0.8105355863454139, 'support': None}}
			------------EPOCH 25---------------
Loss:  tensor(0.0084, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0036, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0029, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1364, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0091, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3168, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0064, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1731, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0077, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0070, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0126, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0868, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0050, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4467, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0102, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0922, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3617, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1355, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0099, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3791, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0005, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0409, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0094, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0015, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0410, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2824, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0896, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9192746193281812, 'B-C': 0.49355432780847147, 'I-C': 0.5686331352369909, 'B-P': 0.7407732864674869, 'I-P': 0.8810807895588394, 'B-MC': 0.4608150470219436, 'I-MC': 0.5689054726368159}, 'recall': {'O': 0.8989150766928544, 'B-C': 0.5903083700440529, 'I-C': 0.5874513327343516, 'B-P': 0.7298701298701299, 'I-P': 0.8352249488752557, 'B-MC': 0.6805555555555556, 'I-MC': 0.7372662798194713}, 'f1': {'O': 0.9089808579859271, 'B-C': 0.5376128385155466, 'I-C': 0.5778890771157104, 'B-P': 0.7352812908853031, 'I-P': 0.8575402865991286, 'B-MC': 0.5495327102803739, 'I-MC': 0.6422353271552934}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8043790702896924, 'recall': 0.8043790702896924, 'f1': 0.8043790702896924, 'support': None}, 'macro_avg': {'precision': 0.6618623825798186, 'recall': 0.7227988133702388, 'f1': 0.6870103412196119, 'support': None}, 'weighted_avg': {'precision': 0.8143122669026681, 'recall': 0.8043790702896924, 'f1': 0.8081158679486738, 'support': None}}
			------------EPOCH 26---------------
Loss:  tensor(0.0117, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0029, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0952, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0091, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3430, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0039, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1843, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0076, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0066, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0127, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0702, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0034, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3913, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0057, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0965, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0032, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3350, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1311, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0096, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3721, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0005, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0408, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0081, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0028, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0543, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2798, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0709, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9089062735068452, 'B-C': 0.44276094276094274, 'I-C': 0.5437859044416421, 'B-P': 0.7117117117117117, 'I-P': 0.8689965362146513, 'B-MC': 0.5387755102040817, 'I-MC': 0.7184718100890207}, 'recall': {'O': 0.9040778151889263, 'B-C': 0.579295154185022, 'I-C': 0.581162024558251, 'B-P': 0.7523809523809524, 'I-P': 0.8593558282208589, 'B-MC': 0.6111111111111112, 'I-MC': 0.6244358478401032}, 'f1': {'O': 0.9064856146142016, 'B-C': 0.5019083969465649, 'I-C': 0.5618530582699963, 'B-P': 0.7314814814814815, 'I-P': 0.8641492943988894, 'B-MC': 0.5726681127982647, 'I-MC': 0.6681614349775785}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8078598697507299, 'recall': 0.8078598697507299, 'f1': 0.8078598697507299, 'support': None}, 'macro_avg': {'precision': 0.6762012412755565, 'recall': 0.7016883904978892, 'f1': 0.6866724847838539, 'support': None}, 'weighted_avg': {'precision': 0.8116914984006193, 'recall': 0.8078598697507299, 'f1': 0.80932081221374, 'support': None}}
			------------EPOCH 27---------------
Loss:  tensor(0.0077, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0027, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1294, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0085, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2835, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0033, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1485, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0033, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0070, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0060, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0121, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0488, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3881, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0046, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0936, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3423, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1252, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0029, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0104, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3446, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0004, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0396, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0064, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0012, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0367, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2688, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0634, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9278744828662868, 'B-C': 0.4748062015503876, 'I-C': 0.5598834534580586, 'B-P': 0.7113237639553429, 'I-P': 0.8439422044322952, 'B-MC': 0.5911111111111111, 'I-MC': 0.7438351122561649}, 'recall': {'O': 0.8894126449682005, 'B-C': 0.539647577092511, 'I-C': 0.5467205750224619, 'B-P': 0.7722943722943723, 'I-P': 0.8838957055214723, 'B-MC': 0.6157407407407407, 'I-MC': 0.6515151515151515}, 'f1': {'O': 0.9082365525672372, 'B-C': 0.5051546391752577, 'I-C': 0.5532237290703841, 'B-P': 0.7405562474055626, 'I-P': 0.8634570244219147, 'B-MC': 0.6031746031746031, 'I-MC': 0.6946210689121842}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8110936447338872, 'recall': 0.8110936447338872, 'f1': 0.8110936447338872, 'support': None}, 'macro_avg': {'precision': 0.693253761375664, 'recall': 0.6998895381649871, 'f1': 0.6954891235324491, 'support': None}, 'weighted_avg': {'precision': 0.8111307245786539, 'recall': 0.8110936447338872, 'f1': 0.8105077908707861, 'support': None}}
			------------EPOCH 28---------------
Loss:  tensor(0.0066, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0027, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0898, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0079, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2854, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1560, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0027, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0067, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0062, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0121, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0431, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0027, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3309, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0981, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2597, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0774, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0083, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3152, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0003, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0321, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0054, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0012, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0385, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2517, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0584, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8986611435757083, 'B-C': 0.4861111111111111, 'I-C': 0.5773261352810416, 'B-P': 0.7232289950576606, 'I-P': 0.8709694168959967, 'B-MC': 0.47651006711409394, 'I-MC': 0.6463059918557301}, 'recall': {'O': 0.9090160867938646, 'B-C': 0.539647577092511, 'I-C': 0.5444743935309974, 'B-P': 0.7601731601731602, 'I-P': 0.8575664621676892, 'B-MC': 0.6574074074074074, 'I-MC': 0.7163120567375887}, 'f1': {'O': 0.9038089570004464, 'B-C': 0.511482254697286, 'I-C': 0.5604192355117139, 'B-P': 0.7412410299704516, 'I-P': 0.8642159767124348, 'B-MC': 0.5525291828793774, 'I-MC': 0.6795107033639144}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8094767572423086, 'recall': 0.8094767572423086, 'f1': 0.8094767572423086, 'support': None}, 'macro_avg': {'precision': 0.6684446944130489, 'recall': 0.7120853062718885, 'f1': 0.6876010485908035, 'support': None}, 'weighted_avg': {'precision': 0.8099246172187513, 'recall': 0.8094767572423086, 'f1': 0.8093753808250318, 'support': None}}
			------------EPOCH 29---------------
Loss:  tensor(0.0042, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0023, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1047, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0055, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2309, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1239, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0058, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0055, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0116, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0392, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2944, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0028, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0997, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3058, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1161, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0023, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0091, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3260, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0003, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0266, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0049, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0022, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0373, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2066, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0686, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9118869492934331, 'B-C': 0.48007246376811596, 'I-C': 0.5561869583747692, 'B-P': 0.7291666666666666, 'I-P': 0.8833568866181106, 'B-MC': 0.4752475247524752, 'I-MC': 0.5930443810018168}, 'recall': {'O': 0.902880658436214, 'B-C': 0.5837004405286343, 'I-C': 0.5862533692722371, 'B-P': 0.7272727272727273, 'I-P': 0.830879345603272, 'B-MC': 0.6666666666666666, 'I-MC': 0.7366215344938749}, 'f1': {'O': 0.9073614557485524, 'B-C': 0.5268389662027834, 'I-C': 0.5708245243128963, 'B-P': 0.7282184655396619, 'I-P': 0.8563148743347911, 'B-MC': 0.5549132947976878, 'I-MC': 0.6570812365204889}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8032337749831574, 'recall': 0.8032337749831574, 'f1': 0.8032337749831574, 'support': None}, 'macro_avg': {'precision': 0.6612802614964839, 'recall': 0.7191821060390896, 'f1': 0.6859361167795518, 'support': None}, 'weighted_avg': {'precision': 0.8125412939894133, 'recall': 0.8032337749831574, 'f1': 0.806799359019687, 'support': None}}
			------------EPOCH 30---------------
Loss:  tensor(0.0052, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0022, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0697, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0046, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2313, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1222, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0027, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0050, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0053, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0113, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0404, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3551, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0028, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1087, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2846, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0807, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0023, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0079, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3098, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0003, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0259, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0034, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0023, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0309, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2325, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0574, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9321606058215666, 'B-C': 0.41930379746835444, 'I-C': 0.5114329268292683, 'B-P': 0.7180762852404643, 'I-P': 0.861388688575255, 'B-MC': 0.5906976744186047, 'I-MC': 0.745425616547335}, 'recall': {'O': 0.8841750841750842, 'B-C': 0.5837004405286343, 'I-C': 0.6028751123090745, 'B-P': 0.7497835497835498, 'I-P': 0.854959100204499, 'B-MC': 0.5879629629629629, 'I-MC': 0.6041263700838169}, 'f1': {'O': 0.9075339835650104, 'B-C': 0.4880294659300184, 'I-C': 0.5534020618556701, 'B-P': 0.7335874629394324, 'I-P': 0.8581618514907375, 'B-MC': 0.5893271461716937, 'I-MC': 0.6673789173789174}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8016618010330114, 'recall': 0.8016618010330114, 'f1': 0.8016618010330114, 'support': None}, 'macro_avg': {'precision': 0.6826407992715497, 'recall': 0.6953689457210889, 'f1': 0.6853458413330685, 'support': None}, 'weighted_avg': {'precision': 0.8125327043433797, 'recall': 0.8016618010330114, 'f1': 0.8056775081313072, 'support': None}}
Tokenizer: ../arg_m/arg_mining/smlm_pretrained_iter5_0/tokenizer Model: ../arg_m/arg_mining/smlm_pretrained_iter5_0/model
	Data split: fullData
			------------EPOCH 1---------------
Loss:  tensor(4.8087, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.9399, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.3711, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.3889, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(4.0503, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.7895, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.7212, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.5237, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.6037, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.7206, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.4102, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.5936, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.9846, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.0444, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.6822, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.0312, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.2153, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.0921, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.8424, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.0011, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(3.0041, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.9085, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.4591, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4191, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.1176, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.8259, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9463, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.3359, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0761, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4251, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7532, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5622, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.7971092800216129, 'B-C': 0.0, 'I-C': 0.0, 'B-P': 0.4148991205380238, 'I-P': 0.6594221150732251, 'B-MC': 0.0, 'I-MC': 0.0}, 'recall': {'O': 0.8830527497194164, 'B-C': 0.0, 'I-C': 0.0, 'B-P': 0.6943722943722944, 'I-P': 0.9369120654396728, 'B-MC': 0.0, 'I-MC': 0.0}, 'f1': {'O': 0.8378829292534876, 'B-C': 0.0, 'I-C': 0.0, 'B-P': 0.5194300518134716, 'I-P': 0.7740491225106123, 'B-MC': 0.0, 'I-MC': 0.0}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.6945879182573546, 'recall': 0.6945879182573546, 'f1': 0.6945879182573546, 'support': None}, 'macro_avg': {'precision': 0.26734721651898025, 'recall': 0.35919101564734046, 'f1': 0.30448030051108166, 'support': None}, 'weighted_avg': {'precision': 0.5396557507869426, 'recall': 0.6945879182573546, 'f1': 0.6049550392011004, 'support': None}}
			------------EPOCH 2---------------
Loss:  tensor(2.2830, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.4584, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.2902, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.3849, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.2391, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.2543, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.2224, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.3231, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.1710, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.4230, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.1706, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.3277, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.0353, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.0092, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.0729, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.9245, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.2590, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.9174, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.9472, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(2.1629, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.9288, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.8660, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.9143, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3265, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6250, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4151, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6938, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7985, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8393, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0847, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3143, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4826, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8846535030875963, 'B-C': 0.19672131147540983, 'I-C': 0.23721275018532245, 'B-P': 0.5021008403361344, 'I-P': 0.6853310857854813, 'B-MC': 0.31343283582089554, 'I-MC': 0.4941348973607038}, 'recall': {'O': 0.8682379349046015, 'B-C': 0.05286343612334802, 'I-C': 0.04791853848457622, 'B-P': 0.8277056277056277, 'I-P': 0.9561349693251534, 'B-MC': 0.09722222222222222, 'I-MC': 0.10863958736299162}, 'f1': {'O': 0.8763688543161392, 'B-C': 0.08333333333333334, 'I-C': 0.07973090818487605, 'B-P': 0.625040863027133, 'I-P': 0.7983948430062542, 'B-MC': 0.1484098939929329, 'I-MC': 0.17811839323467232}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7178082191780822, 'recall': 0.7178082191780822, 'f1': 0.7178082191780822, 'support': None}, 'macro_avg': {'precision': 0.4733696034359348, 'recall': 0.4226746165897887, 'f1': 0.39848529844219166, 'support': None}, 'weighted_avg': {'precision': 0.6530950537304202, 'recall': 0.7178082191780822, 'f1': 0.6558735952176563, 'support': None}}
			------------EPOCH 3---------------
Loss:  tensor(1.7373, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.8520, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7227, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.8029, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6000, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7305, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6160, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7095, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6240, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.8806, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.5745, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.8596, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.5765, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.5607, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6687, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4175, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.7332, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4579, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.5711, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6933, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4382, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4062, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.5806, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2413, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2273, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0216, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5194, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4194, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6393, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8548, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0118, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3418, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9075442648190916, 'B-C': 0.3672316384180791, 'I-C': 0.41849148418491483, 'B-P': 0.5802469135802469, 'I-P': 0.7169775553533515, 'B-MC': 0.5772357723577236, 'I-MC': 0.7707581227436823}, 'recall': {'O': 0.8820800598578377, 'B-C': 0.14317180616740088, 'I-C': 0.1287810721772986, 'B-P': 0.8545454545454545, 'I-P': 0.9668200408997955, 'B-MC': 0.3287037037037037, 'I-MC': 0.2753062540296583}, 'f1': {'O': 0.8946309998102827, 'B-C': 0.2060221870047544, 'I-C': 0.19695408221687852, 'B-P': 0.6911764705882354, 'I-P': 0.82336293974225, 'B-MC': 0.41887905604719766, 'I-MC': 0.40570071258907364}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7531327195149338, 'recall': 0.7531327195149338, 'f1': 0.7531327195149339, 'support': None}, 'macro_avg': {'precision': 0.6197836787795843, 'recall': 0.5113440559115927, 'f1': 0.519532349714096, 'support': None}, 'weighted_avg': {'precision': 0.7253667007862731, 'recall': 0.7531327195149338, 'f1': 0.7100333743061806, 'support': None}}
			------------EPOCH 4---------------
Loss:  tensor(1.3329, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4347, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3018, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3924, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1982, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4818, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2646, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4951, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1692, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3099, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1613, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.5081, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1960, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1660, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4867, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1376, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3447, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1288, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2912, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2221, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0838, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0863, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.4641, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2034, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0009, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7431, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3913, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0783, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4997, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6854, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0995, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3772, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8500239578342118, 'B-C': 0.40574506283662476, 'I-C': 0.5192761605035405, 'B-P': 0.6694915254237288, 'I-P': 0.8613776924717423, 'B-MC': 0.43197278911564624, 'I-MC': 0.6418639609169485}, 'recall': {'O': 0.9291432846988402, 'B-C': 0.4977973568281938, 'I-C': 0.4941599281221923, 'B-P': 0.7523809523809524, 'I-P': 0.8259713701431493, 'B-MC': 0.5879629629629629, 'I-MC': 0.5506125080593166}, 'f1': {'O': 0.8878244083792093, 'B-C': 0.447082096933729, 'I-C': 0.5064068134734904, 'B-P': 0.7085201793721974, 'I-P': 0.8433030587744023, 'B-MC': 0.49803921568627446, 'I-MC': 0.5927468332465731}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7815854480125758, 'recall': 0.7815854480125758, 'f1': 0.7815854480125758, 'support': None}, 'macro_avg': {'precision': 0.6256787355860632, 'recall': 0.6625754804565153, 'f1': 0.6405603722665537, 'support': None}, 'weighted_avg': {'precision': 0.7796695073130417, 'recall': 0.7815854480125758, 'f1': 0.779478081618983, 'support': None}}
			------------EPOCH 5---------------
Loss:  tensor(1.0407, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0097, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0452, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1134, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8559, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2137, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9798, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2518, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0744, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1887, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9080, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1148, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0420, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9542, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.2690, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9640, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1360, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9267, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.3150, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1162, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9258, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9871, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1977, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1520, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6699, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5770, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3641, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8647, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3974, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7225, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9053, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2568, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9235109717868338, 'B-C': 0.4159159159159159, 'I-C': 0.4863179771388985, 'B-P': 0.7258064516129032, 'I-P': 0.866000634316524, 'B-MC': 0.5922330097087378, 'I-MC': 0.7711349836524989}, 'recall': {'O': 0.8817059483726151, 'B-C': 0.6101321585903083, 'I-C': 0.6307277628032345, 'B-P': 0.7402597402597403, 'I-P': 0.8375766871165644, 'B-MC': 0.5648148148148148, 'I-MC': 0.5322372662798195}, 'f1': {'O': 0.9021244019138757, 'B-C': 0.4946428571428572, 'I-C': 0.5491883434382946, 'B-P': 0.732961851693099, 'I-P': 0.8515515359426166, 'B-MC': 0.5781990521327014, 'I-MC': 0.6297921037573908}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7923646979564338, 'recall': 0.7923646979564338, 'f1': 0.7923646979564338, 'support': None}, 'macro_avg': {'precision': 0.6829885634474732, 'recall': 0.6853506254624424, 'f1': 0.6769228780029763, 'support': None}, 'weighted_avg': {'precision': 0.8101604159739877, 'recall': 0.7923646979564338, 'f1': 0.797897267663641, 'support': None}}
			------------EPOCH 6---------------
Loss:  tensor(0.7702, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7395, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7595, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8643, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6187, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0731, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9791, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7320, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9037, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6415, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9105, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7312, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6991, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0994, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7674, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8246, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7125, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9494, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7297, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6317, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7585, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1436, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1519, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6944, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5193, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2441, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6261, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3072, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5505, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8752, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2541, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8828919835485965, 'B-C': 0.30863039399624764, 'I-C': 0.3487639847256649, 'B-P': 0.7597122302158273, 'I-P': 0.9622641509433962, 'B-MC': 0.4620253164556962, 'I-MC': 0.6215657311669128}, 'recall': {'O': 0.9155256266367378, 'B-C': 0.724669603524229, 'I-C': 0.7795747229709494, 'B-P': 0.45714285714285713, 'I-P': 0.5058282208588957, 'B-MC': 0.6759259259259259, 'I-MC': 0.6782720825274017}, 'f1': {'O': 0.8989127240669996, 'B-C': 0.4328947368421053, 'I-C': 0.4819254802129137, 'B-P': 0.5708108108108108, 'I-P': 0.6630922860398095, 'B-MC': 0.5488721804511278, 'I-MC': 0.648681979343302}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.6836514709184819, 'recall': 0.6836514709184819, 'f1': 0.6836514709184819, 'support': None}, 'macro_avg': {'precision': 0.6208362558646202, 'recall': 0.6767055770838566, 'f1': 0.6064557425381526, 'support': None}, 'weighted_avg': {'precision': 0.8083598525440283, 'recall': 0.6836514709184819, 'f1': 0.7004028883195402, 'support': None}}
			------------EPOCH 7---------------
Loss:  tensor(1.1656, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0604, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8274, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7350, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4885, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9324, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4866, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8734, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8572, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9638, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7799, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0317, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6617, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6397, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9522, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5047, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7005, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6034, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.6305, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0663, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9767, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9616, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.1088, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0813, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4299, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3540, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2180, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5015, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2976, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4863, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7835, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2168, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8929342565706202, 'B-C': 0.4431818181818182, 'I-C': 0.48952649181482133, 'B-P': 0.6338400528750826, 'I-P': 0.7924709162999434, 'B-MC': 0.8235294117647058, 'I-MC': 0.9654427645788337}, 'recall': {'O': 0.8973438084549196, 'B-C': 0.42951541850220265, 'I-C': 0.4164420485175202, 'B-P': 0.8303030303030303, 'I-P': 0.9298568507157464, 'B-MC': 0.19444444444444445, 'I-MC': 0.14410058027079303}, 'f1': {'O': 0.8951336020301538, 'B-C': 0.436241610738255, 'I-C': 0.45003641071284084, 'B-P': 0.7188905547226387, 'I-P': 0.8556844110935993, 'B-MC': 0.3146067415730337, 'I-MC': 0.25077138849929875}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.777116550640018, 'recall': 0.777116550640018, 'f1': 0.7771165506400181, 'support': None}, 'macro_avg': {'precision': 0.7201322445836892, 'recall': 0.548858025886951, 'f1': 0.5601949599099744, 'support': None}, 'weighted_avg': {'precision': 0.7817163932050885, 'recall': 0.777116550640018, 'f1': 0.7541030992173495, 'support': None}}
			------------EPOCH 8---------------
Loss:  tensor(0.8902, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9503, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7623, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9877, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6026, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(1.0089, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4579, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8649, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4799, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5272, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4138, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4973, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5318, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4940, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8296, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4212, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5320, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4401, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8640, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5636, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3771, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5282, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9159, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0557, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3329, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2772, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1569, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3560, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1935, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3359, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5500, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1645, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.888315276666909, 'B-C': 0.41569767441860467, 'I-C': 0.4893042107633416, 'B-P': 0.6913477537437605, 'I-P': 0.8690707350901525, 'B-MC': 0.6593406593406593, 'I-MC': 0.8759733036707452}, 'recall': {'O': 0.9141040029928919, 'B-C': 0.6299559471365639, 'I-C': 0.6507936507936508, 'B-P': 0.7194805194805195, 'I-P': 0.8008691206543967, 'B-MC': 0.5555555555555556, 'I-MC': 0.5077369439071566}, 'f1': {'O': 0.9010251493472969, 'B-C': 0.500875656742557, 'I-C': 0.5586118251928021, 'B-P': 0.7051336444633007, 'I-P': 0.8335772249567647, 'B-MC': 0.6030150753768844, 'I-MC': 0.6428571428571428}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7868852459016393, 'recall': 0.7868852459016393, 'f1': 0.7868852459016392, 'support': None}, 'macro_avg': {'precision': 0.6984356590991675, 'recall': 0.6826422486458192, 'f1': 0.677870816990964, 'support': None}, 'weighted_avg': {'precision': 0.8081260036078721, 'recall': 0.7868852459016393, 'f1': 0.7914574990064893, 'support': None}}
			------------EPOCH 9---------------
Loss:  tensor(0.3431, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3362, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3750, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4624, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3396, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7611, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2788, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7042, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2970, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3231, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2683, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3028, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4198, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3818, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.9441, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2998, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3413, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2476, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7018, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4397, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2618, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3821, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.8366, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0282, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2522, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2638, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1370, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2925, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1619, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2438, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5136, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1706, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9008270620669101, 'B-C': 0.4878048780487805, 'I-C': 0.5867670364500792, 'B-P': 0.6808988764044944, 'I-P': 0.8247979703063334, 'B-MC': 0.5875, 'I-MC': 0.7342406876790831}, 'recall': {'O': 0.9046015712682379, 'B-C': 0.44052863436123346, 'I-C': 0.44354597184785866, 'B-P': 0.787012987012987, 'I-P': 0.8974948875255624, 'B-MC': 0.6527777777777778, 'I-MC': 0.6608639587362991}, 'f1': {'O': 0.902710371089375, 'B-C': 0.46296296296296297, 'I-C': 0.5052021149582124, 'B-P': 0.730120481927711, 'I-P': 0.8596121829399668, 'B-MC': 0.6184210526315789, 'I-MC': 0.6956226671191041}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.806355266112733, 'recall': 0.806355266112733, 'f1': 0.806355266112733, 'support': None}, 'macro_avg': {'precision': 0.6861195015650973, 'recall': 0.6838322555042795, 'f1': 0.6820931190898445, 'support': None}, 'weighted_avg': {'precision': 0.7972928022275234, 'recall': 0.806355266112733, 'f1': 0.7994015929528354, 'support': None}}
			------------EPOCH 10---------------
Loss:  tensor(0.2406, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2140, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2381, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4276, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2188, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5981, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1765, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6077, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2099, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2694, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2167, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2438, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3188, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1954, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7022, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1726, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4231, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2214, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6482, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4409, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1697, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2087, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6370, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0202, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1931, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0869, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0621, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1822, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1094, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1524, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5660, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1912, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8544551685550427, 'B-C': 0.395, 'I-C': 0.46436951957515127, 'B-P': 0.7573839662447257, 'I-P': 0.9139869281045752, 'B-MC': 0.4019370460048426, 'I-MC': 0.5311858842839556}, 'recall': {'O': 0.9141040029928919, 'B-C': 0.5220264317180616, 'I-C': 0.5630428271937706, 'B-P': 0.6216450216450217, 'I-P': 0.7149284253578733, 'B-MC': 0.7685185185185185, 'I-MC': 0.8346228239845261}, 'f1': {'O': 0.883273686874164, 'B-C': 0.4497153700189753, 'I-C': 0.5089678510998308, 'B-P': 0.6828340466000952, 'I-P': 0.8022948938611589, 'B-MC': 0.5278219395866455, 'I-MC': 0.649197592778335}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7561419267909275, 'recall': 0.7561419267909275, 'f1': 0.7561419267909275, 'support': None}, 'macro_avg': {'precision': 0.6169026446811847, 'recall': 0.705555435915809, 'f1': 0.643443625831315, 'support': None}, 'weighted_avg': {'precision': 0.7901886995101063, 'recall': 0.7561419267909275, 'f1': 0.763920791201504, 'support': None}}
			------------EPOCH 11---------------
Loss:  tensor(0.5214, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4443, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3784, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4351, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1772, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5638, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1065, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4571, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1238, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1758, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2609, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2296, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3161, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2494, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.7329, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2245, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4936, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2527, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5505, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3757, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1563, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1820, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6320, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0161, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1584, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0888, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0775, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1374, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0990, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1195, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4168, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1526, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8594394352414901, 'B-C': 0.42680776014109345, 'I-C': 0.48886600592096796, 'B-P': 0.7397129186602871, 'I-P': 0.8848368522072937, 'B-MC': 0.5054545454545455, 'I-MC': 0.6458209839952579}, 'recall': {'O': 0.9200149644594089, 'B-C': 0.5330396475770925, 'I-C': 0.568733153638814, 'B-P': 0.6692640692640692, 'I-P': 0.777760736196319, 'B-MC': 0.6435185185185185, 'I-MC': 0.7024500322372663}, 'f1': {'O': 0.8886961549580803, 'B-C': 0.4740450538687561, 'I-C': 0.5257838997715789, 'B-P': 0.7027272727272726, 'I-P': 0.8278507876908007, 'B-MC': 0.5661914460285132, 'I-MC': 0.6729462631253861}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.777902537615091, 'recall': 0.777902537615091, 'f1': 0.777902537615091, 'support': None}, 'macro_avg': {'precision': 0.6501340716601336, 'recall': 0.6878258745559268, 'f1': 0.6654629825957697, 'support': None}, 'weighted_avg': {'precision': 0.7909076597135766, 'recall': 0.777902537615091, 'f1': 0.7819001463268568, 'support': None}}
			------------EPOCH 12---------------
Loss:  tensor(0.1918, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1743, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2370, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3165, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2450, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6526, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3267, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5666, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3135, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1580, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1799, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2217, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2377, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1764, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6063, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1542, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3126, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2484, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6104, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3484, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2125, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1942, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5857, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0319, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1647, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1738, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0508, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1496, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0642, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1792, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4694, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1453, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8589348283111422, 'B-C': 0.34660925726587727, 'I-C': 0.40245523496755026, 'B-P': 0.7653301886792453, 'I-P': 0.9206050517478469, 'B-MC': 0.6687116564417178, 'I-MC': 0.8074679113185531}, 'recall': {'O': 0.9170968948746726, 'B-C': 0.7092511013215859, 'I-C': 0.7707397424378556, 'B-P': 0.5619047619047619, 'I-P': 0.6503067484662577, 'B-MC': 0.5046296296296297, 'I-MC': 0.44616376531270147}, 'f1': {'O': 0.8870635064230142, 'B-C': 0.4656543745480839, 'I-C': 0.5287923152000823, 'B-P': 0.6480279580629056, 'I-P': 0.7622015160140216, 'B-MC': 0.575197889182058, 'I-MC': 0.574750830564784}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7318212441050976, 'recall': 0.7318212441050976, 'f1': 0.7318212441050976, 'support': None}, 'macro_avg': {'precision': 0.6814448755331333, 'recall': 0.6514418062782091, 'f1': 0.6345269128564214, 'support': None}, 'weighted_avg': {'precision': 0.805408017091947, 'recall': 0.7318212441050976, 'f1': 0.7447235503158942, 'support': None}}
			------------EPOCH 13---------------
Loss:  tensor(0.3590, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3392, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3827, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4846, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3313, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6724, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1816, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4442, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1429, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1473, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1565, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1745, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2075, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1533, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6028, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1544, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2782, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2245, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5499, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2784, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1535, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2305, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6246, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0290, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1303, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1087, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0450, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1389, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0701, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1369, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4352, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1400, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9114285714285715, 'B-C': 0.39080459770114945, 'I-C': 0.44808800419067574, 'B-P': 0.695852534562212, 'I-P': 0.8842975206611571, 'B-MC': 0.5079872204472844, 'I-MC': 0.6641708264004437}, 'recall': {'O': 0.8831275720164609, 'B-C': 0.5991189427312775, 'I-C': 0.640461215932914, 'B-P': 0.6536796536796536, 'I-P': 0.7384969325153374, 'B-MC': 0.7361111111111112, 'I-MC': 0.7720825274016764}, 'f1': {'O': 0.8970549116473495, 'B-C': 0.47304347826086957, 'I-C': 0.5272760895025581, 'B-P': 0.6741071428571429, 'I-P': 0.8048474717927289, 'B-MC': 0.6011342155009453, 'I-MC': 0.7140727489564699}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7659106220525489, 'recall': 0.7659106220525489, 'f1': 0.7659106220525489, 'support': None}, 'macro_avg': {'precision': 0.6432327536273562, 'recall': 0.7175825650554902, 'f1': 0.670219436931152, 'support': None}, 'weighted_avg': {'precision': 0.799945029444125, 'recall': 0.7659106220525489, 'f1': 0.7768102249061822, 'support': None}}
			------------EPOCH 14---------------
Loss:  tensor(0.1969, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1304, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2843, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3738, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2399, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5001, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1198, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3586, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1033, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1373, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0906, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1225, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1275, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1168, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5440, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1030, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2284, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1105, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5272, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3418, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1290, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2008, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5593, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0328, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1438, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0958, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0378, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0648, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0442, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1052, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3606, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1114, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8673258874582058, 'B-C': 0.36997635933806144, 'I-C': 0.4714300213133056, 'B-P': 0.6410912190963342, 'I-P': 0.9113727519730883, 'B-MC': 0.4854368932038835, 'I-MC': 0.7328863796753705}, 'recall': {'O': 0.9122334455667789, 'B-C': 0.6894273127753304, 'I-C': 0.6955675351901767, 'B-P': 0.651082251082251, 'I-P': 0.7202453987730061, 'B-MC': 0.6944444444444444, 'I-MC': 0.6695680206318504}, 'f1': {'O': 0.8892130406243162, 'B-C': 0.48153846153846147, 'I-C': 0.5619744722037384, 'B-P': 0.6460481099656357, 'I-P': 0.8046147695470901, 'B-MC': 0.5714285714285715, 'I-MC': 0.6997978436657682}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.768403323602066, 'recall': 0.768403323602066, 'f1': 0.768403323602066, 'support': None}, 'macro_avg': {'precision': 0.6399313588654643, 'recall': 0.7189383440662626, 'f1': 0.6649450384247974, 'support': None}, 'weighted_avg': {'precision': 0.8051463898648903, 'recall': 0.768403323602066, 'f1': 0.7777783116256499, 'support': None}}
			------------EPOCH 15---------------
Loss:  tensor(0.1047, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1056, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1543, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2421, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2413, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5094, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1874, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4430, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1838, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1755, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1305, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0923, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1519, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1233, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5030, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0782, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2254, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0831, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4697, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2191, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0746, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1296, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5083, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0234, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3853, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2058, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0580, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2727, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1069, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2190, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3558, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0908, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8855638880876838, 'B-C': 0.4230769230769231, 'I-C': 0.4917114695340502, 'B-P': 0.6527131782945736, 'I-P': 0.8849809344943372, 'B-MC': 0.6521739130434783, 'I-MC': 0.8349270256668344}, 'recall': {'O': 0.9188926300037411, 'B-C': 0.6541850220264317, 'I-C': 0.65738244983528, 'B-P': 0.729004329004329, 'I-P': 0.7949897750511248, 'B-MC': 0.5555555555555556, 'I-MC': 0.534816247582205}, 'f1': {'O': 0.9019204641427679, 'B-C': 0.5138408304498271, 'I-C': 0.5626041266179674, 'B-P': 0.6887525562372189, 'I-P': 0.8375750720422289, 'B-MC': 0.6, 'I-MC': 0.6519944979367261}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.78910846620256, 'recall': 0.78910846620256, 'f1': 0.78910846620256, 'support': None}, 'macro_avg': {'precision': 0.6893067617425543, 'recall': 0.6921180012940953, 'f1': 0.6795267924895337, 'support': None}, 'weighted_avg': {'precision': 0.8108289126295214, 'recall': 0.78910846620256, 'f1': 0.7944101873354326, 'support': None}}
			------------EPOCH 16---------------
Loss:  tensor(0.0758, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0587, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0840, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2051, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1094, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3904, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1177, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3328, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1723, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1299, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1967, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2066, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3102, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2463, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5669, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1855, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1808, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1317, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4218, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1915, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0898, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1053, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4798, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0116, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1624, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1314, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0602, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1566, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1297, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1790, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3839, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1489, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8073104693140795, 'B-C': 0.539568345323741, 'I-C': 0.6253583943931188, 'B-P': 0.6396705559368565, 'I-P': 0.8059430233086464, 'B-MC': 0.6217616580310881, 'I-MC': 0.7986611740473738}, 'recall': {'O': 0.9369996258885148, 'B-C': 0.3303964757709251, 'I-C': 0.29395028451632227, 'B-P': 0.8069264069264069, 'I-P': 0.9068507157464213, 'B-MC': 0.5555555555555556, 'I-MC': 0.5}, 'f1': {'O': 0.8673338643210861, 'B-C': 0.4098360655737705, 'I-C': 0.39991850870938167, 'B-P': 0.7136294027565084, 'I-P': 0.8534244268565516, 'B-MC': 0.5867970660146699, 'I-MC': 0.6149881046788263}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.785470469346508, 'recall': 0.785470469346508, 'f1': 0.785470469346508, 'support': None}, 'macro_avg': {'precision': 0.6911819457649865, 'recall': 0.6186684377720209, 'f1': 0.6351324912729706, 'support': None}, 'weighted_avg': {'precision': 0.7708426749825008, 'recall': 0.785470469346508, 'f1': 0.763537019718254, 'support': None}}
			------------EPOCH 17---------------
Loss:  tensor(0.4135, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1859, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1262, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3525, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1238, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3821, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0712, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2591, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0475, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0508, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1080, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0950, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1679, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0686, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6010, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1290, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1696, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0964, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.5939, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2818, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1000, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1003, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.6204, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0034, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0934, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0842, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0142, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0506, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0290, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0586, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2925, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0984, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8619552249280651, 'B-C': 0.4790697674418605, 'I-C': 0.5813055233680958, 'B-P': 0.6783582089552239, 'I-P': 0.8361194318291623, 'B-MC': 0.5789473684210527, 'I-MC': 0.775238886580806}, 'recall': {'O': 0.9189674523007856, 'B-C': 0.45374449339207046, 'I-C': 0.45073375262054505, 'B-P': 0.787012987012987, 'I-P': 0.8847648261758692, 'B-MC': 0.6111111111111112, 'I-MC': 0.6015473887814313}, 'f1': {'O': 0.8895487796045484, 'B-C': 0.4660633484162896, 'I-C': 0.5077597840755735, 'B-P': 0.7286573146292586, 'I-P': 0.8597545829400368, 'B-MC': 0.5945945945945946, 'I-MC': 0.6774369214013433}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8019537390523243, 'recall': 0.8019537390523243, 'f1': 0.8019537390523243, 'support': None}, 'macro_avg': {'precision': 0.6844277730748951, 'recall': 0.6725545730563999, 'f1': 0.6748307608088063, 'support': None}, 'weighted_avg': {'precision': 0.7924395018259296, 'recall': 0.8019537390523243, 'f1': 0.7945087051645133, 'support': None}}
			------------EPOCH 18---------------
Loss:  tensor(0.0508, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0333, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0282, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2250, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0493, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4424, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0367, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2475, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0428, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0693, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0505, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0571, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0829, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0359, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4443, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0430, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2090, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0504, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3760, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1452, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0211, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0441, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4070, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0595, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0347, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0115, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0409, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0254, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0956, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3159, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0889, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9377543691644721, 'B-C': 0.464354527938343, 'I-C': 0.5292905504857227, 'B-P': 0.7193548387096774, 'I-P': 0.8327994262491035, 'B-MC': 0.6414141414141414, 'I-MC': 0.775825117873982}, 'recall': {'O': 0.8792368125701459, 'B-C': 0.5308370044052864, 'I-C': 0.5384845762204252, 'B-P': 0.7722943722943723, 'I-P': 0.8904907975460122, 'B-MC': 0.5879629629629629, 'I-MC': 0.5834945196647324}, 'f1': {'O': 0.9075532900834106, 'B-C': 0.49537512846865367, 'I-C': 0.5338479809976246, 'B-P': 0.7448851774530272, 'I-P': 0.8606794317479926, 'B-MC': 0.6135265700483091, 'I-MC': 0.6660533578656854}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8047383786211543, 'recall': 0.8047383786211543, 'f1': 0.8047383786211543, 'support': None}, 'macro_avg': {'precision': 0.7001132816907774, 'recall': 0.6832572922377054, 'f1': 0.6888458480949575, 'support': None}, 'weighted_avg': {'precision': 0.8071878055702565, 'recall': 0.8047383786211543, 'f1': 0.8042496781399159, 'support': None}}
			------------EPOCH 19---------------
Loss:  tensor(0.0503, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0338, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0523, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1445, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0383, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3598, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0160, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2438, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0119, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0206, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0278, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0317, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0807, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0396, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4259, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0646, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1756, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0486, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3989, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1780, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0194, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0415, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4061, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0019, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0679, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0233, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0098, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0174, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0099, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0541, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2936, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0866, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9466212850738834, 'B-C': 0.5021929824561403, 'I-C': 0.552072968490879, 'B-P': 0.6983240223463687, 'I-P': 0.8, 'B-MC': 0.6578947368421053, 'I-MC': 0.7955947136563877}, 'recall': {'O': 0.8531986531986532, 'B-C': 0.5044052863436124, 'I-C': 0.49850254567235697, 'B-P': 0.7575757575757576, 'I-P': 0.9114519427402863, 'B-MC': 0.5787037037037037, 'I-MC': 0.5822050290135397}, 'f1': {'O': 0.8974853409940576, 'B-C': 0.5032967032967034, 'I-C': 0.5239219389361033, 'B-P': 0.7267441860465116, 'I-P': 0.8520970247341378, 'B-MC': 0.6157635467980296, 'I-MC': 0.6723752792256144}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7993487536492252, 'recall': 0.7993487536492252, 'f1': 0.7993487536492252, 'support': None}, 'macro_avg': {'precision': 0.7075286726951092, 'recall': 0.6694347026068443, 'f1': 0.6845262885758796, 'support': None}, 'weighted_avg': {'precision': 0.8001557755077408, 'recall': 0.7993487536492252, 'f1': 0.7960309763046771, 'support': None}}
			------------EPOCH 20---------------
Loss:  tensor(0.0784, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0475, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0516, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1778, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0450, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3995, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0272, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2373, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0144, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0214, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0201, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0244, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0518, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0193, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3615, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0265, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1728, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0442, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3962, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1635, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0642, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0816, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.4027, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0078, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0545, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0275, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0036, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0205, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0068, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0294, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2341, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0733, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9196731421523281, 'B-C': 0.4928131416837782, 'I-C': 0.5488565488565489, 'B-P': 0.7193263833199679, 'I-P': 0.8326468052495449, 'B-MC': 0.6538461538461539, 'I-MC': 0.7887323943661971}, 'recall': {'O': 0.8926300037411149, 'B-C': 0.5286343612334802, 'I-C': 0.5139263252470799, 'B-P': 0.7766233766233767, 'I-P': 0.8887525562372188, 'B-MC': 0.6296296296296297, 'I-MC': 0.6318504190844616}, 'f1': {'O': 0.9059498044576071, 'B-C': 0.51009564293305, 'I-C': 0.5308174155131079, 'B-P': 0.7468776019983346, 'I-P': 0.8597853504129779, 'B-MC': 0.6415094339622641, 'I-MC': 0.7016287810989796}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8079721536043117, 'recall': 0.8079721536043117, 'f1': 0.8079721536043118, 'support': None}, 'macro_avg': {'precision': 0.7079849384963598, 'recall': 0.6945780959709087, 'f1': 0.699523432910903, 'support': None}, 'weighted_avg': {'precision': 0.8058770465848801, 'recall': 0.8079721536043117, 'f1': 0.8057369157950789, 'support': None}}
			------------EPOCH 21---------------
Loss:  tensor(0.0170, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0089, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0109, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0742, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0222, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2535, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0244, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1682, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0270, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0455, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0346, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0392, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0633, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0330, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3668, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0275, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1100, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0140, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2967, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1035, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0066, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0337, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3126, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0013, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0447, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0157, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0032, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0147, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0070, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0283, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2258, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0775, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8579121789560895, 'B-C': 0.42833607907743, 'I-C': 0.5333805811481219, 'B-P': 0.6902439024390243, 'I-P': 0.8752448313384114, 'B-MC': 0.569620253164557, 'I-MC': 0.7504927079227434}, 'recall': {'O': 0.9297418630751965, 'B-C': 0.5726872246696035, 'I-C': 0.5634920634920635, 'B-P': 0.7350649350649351, 'I-P': 0.8224437627811861, 'B-MC': 0.625, 'I-MC': 0.6137975499677627}, 'f1': {'O': 0.892383927609609, 'B-C': 0.4901036757775683, 'I-C': 0.5480230102672394, 'B-P': 0.7119496855345913, 'I-P': 0.8480231945176595, 'B-MC': 0.5960264900662252, 'I-MC': 0.6752970384820003}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.7955086458567258, 'recall': 0.7955086458567258, 'f1': 0.7955086458567258, 'support': None}, 'macro_avg': {'precision': 0.6721757905780539, 'recall': 0.6946039141501068, 'f1': 0.6802581460364133, 'support': None}, 'weighted_avg': {'precision': 0.7992468748150177, 'recall': 0.7955086458567258, 'f1': 0.7959143191253853, 'support': None}}
			------------EPOCH 22---------------
Loss:  tensor(0.0223, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0192, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0194, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0821, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0275, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2665, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0167, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1372, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0100, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0162, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0149, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0348, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0407, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0086, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2928, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0121, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1118, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0111, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2563, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0965, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0052, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0287, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3001, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0531, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0133, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0061, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0044, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0266, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2333, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0683, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9120503870086508, 'B-C': 0.4867075664621677, 'I-C': 0.5521081941129674, 'B-P': 0.7176, 'I-P': 0.8350391619816443, 'B-MC': 0.6666666666666666, 'I-MC': 0.8000863557858376}, 'recall': {'O': 0.899289188178077, 'B-C': 0.5242290748898678, 'I-C': 0.5196166516921233, 'B-P': 0.7766233766233767, 'I-P': 0.8884458077709612, 'B-MC': 0.6203703703703703, 'I-MC': 0.5973565441650548}, 'f1': {'O': 0.9056248351731153, 'B-C': 0.5047720042417815, 'I-C': 0.5353698989431459, 'B-P': 0.7459459459459459, 'I-P': 0.860915013252087, 'B-MC': 0.6426858513189448, 'I-MC': 0.684016242155777}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8081967213114755, 'recall': 0.8081967213114755, 'f1': 0.8081967213114755, 'support': None}, 'macro_avg': {'precision': 0.7100369045739906, 'recall': 0.6894187162414045, 'f1': 0.6970471130043997, 'support': None}, 'weighted_avg': {'precision': 0.8058737718805181, 'recall': 0.8081967213114755, 'f1': 0.8055186694585064, 'support': None}}
			------------EPOCH 23---------------
Loss:  tensor(0.0187, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0056, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0077, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0478, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0113, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1938, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0057, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1047, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0058, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0110, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0123, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0163, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0251, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0057, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2911, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0082, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0910, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0084, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2643, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1033, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0043, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0253, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2732, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0008, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0429, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0082, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0018, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0049, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0040, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0232, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1844, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0560, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9153816938853019, 'B-C': 0.4637964774951076, 'I-C': 0.5373204247345409, 'B-P': 0.7049689440993789, 'I-P': 0.831495913324463, 'B-MC': 0.695906432748538, 'I-MC': 0.8484848484848485}, 'recall': {'O': 0.9016835016835016, 'B-C': 0.5220264317180616, 'I-C': 0.5152740341419587, 'B-P': 0.7861471861471861, 'I-P': 0.8945807770961145, 'B-MC': 0.5509259259259259, 'I-MC': 0.5325596389426177}, 'f1': {'O': 0.9084809649453448, 'B-C': 0.4911917098445596, 'I-C': 0.5260663507109006, 'B-P': 0.7433483422022105, 'I-P': 0.8618855285193576, 'B-MC': 0.6149870801033592, 'I-MC': 0.6543870073281839}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8063328093420166, 'recall': 0.8063328093420166, 'f1': 0.8063328093420166, 'support': None}, 'macro_avg': {'precision': 0.7139078192531684, 'recall': 0.6718853565221953, 'f1': 0.6857638548077024, 'support': None}, 'weighted_avg': {'precision': 0.8060516667521238, 'recall': 0.8063328093420166, 'f1': 0.8030027891742172, 'support': None}}
			------------EPOCH 24---------------
Loss:  tensor(0.0100, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0051, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0057, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0613, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0090, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2631, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0050, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1406, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0048, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0097, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0098, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0143, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0228, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0046, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2541, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0071, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1117, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0080, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2427, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0909, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0043, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0213, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3028, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0007, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0340, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0052, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0062, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0339, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2306, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0619, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8836855819339896, 'B-C': 0.45904761904761904, 'I-C': 0.5340120314669135, 'B-P': 0.7156943303204601, 'I-P': 0.8521384928716904, 'B-MC': 0.5947136563876652, 'I-MC': 0.7654462242562929}, 'recall': {'O': 0.9135054246165357, 'B-C': 0.5308370044052864, 'I-C': 0.518418688230009, 'B-P': 0.7541125541125541, 'I-P': 0.8556237218813906, 'B-MC': 0.625, 'I-MC': 0.6470019342359767}, 'f1': {'O': 0.8983481108127002, 'B-C': 0.4923391215526047, 'I-C': 0.5260998404376567, 'B-P': 0.7344013490725126, 'I-P': 0.8538775510204082, 'B-MC': 0.6094808126410834, 'I-MC': 0.7012578616352201}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8008309005165057, 'recall': 0.8008309005165057, 'f1': 0.8008309005165057, 'support': None}, 'macro_avg': {'precision': 0.6863911337549471, 'recall': 0.6920713324973933, 'f1': 0.6879720924531695, 'support': None}, 'weighted_avg': {'precision': 0.7990641360261529, 'recall': 0.8008309005165057, 'f1': 0.7994670423332619, 'support': None}}
			------------EPOCH 25---------------
Loss:  tensor(0.0108, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0043, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0045, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0695, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0075, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2034, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0044, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1103, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0043, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0089, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0082, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0137, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0297, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0047, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3100, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0060, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1547, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0061, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.3069, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1343, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0036, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0159, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2792, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0006, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0281, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0042, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0014, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0040, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0029, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0217, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1685, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0529, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9118311981914092, 'B-C': 0.4721115537848606, 'I-C': 0.5476981490270527, 'B-P': 0.713258785942492, 'I-P': 0.8446764503102532, 'B-MC': 0.6069868995633187, 'I-MC': 0.769811320754717}, 'recall': {'O': 0.9053497942386831, 'B-C': 0.5220264317180616, 'I-C': 0.518418688230009, 'B-P': 0.7731601731601732, 'I-P': 0.8768916155419223, 'B-MC': 0.6435185185185185, 'I-MC': 0.6576402321083172}, 'f1': {'O': 0.9085789374882673, 'B-C': 0.49581589958158995, 'I-C': 0.5326563581813986, 'B-P': 0.7420024927295388, 'I-P': 0.8604826167661668, 'B-MC': 0.6247191011235955, 'I-MC': 0.7093184979137691}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8089602515158321, 'recall': 0.8089602515158321, 'f1': 0.8089602515158321, 'support': None}, 'macro_avg': {'precision': 0.6951963367963004, 'recall': 0.6995722076450978, 'f1': 0.696224843397761, 'support': None}, 'weighted_avg': {'precision': 0.8067199641043814, 'recall': 0.8089602515158321, 'f1': 0.8072902593149822, 'support': None}}
			------------EPOCH 26---------------
Loss:  tensor(0.0082, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0040, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0486, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0072, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2081, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1082, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0038, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0081, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0087, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0132, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0235, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0038, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2503, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0042, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1226, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0044, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2048, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0694, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0033, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0125, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2550, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0006, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0315, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0012, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0036, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0029, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0227, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1651, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0526, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9141839132742021, 'B-C': 0.4745417515274949, 'I-C': 0.546152615581507, 'B-P': 0.7010869565217391, 'I-P': 0.8282943525385055, 'B-MC': 0.680628272251309, 'I-MC': 0.8414985590778098}, 'recall': {'O': 0.9022820800598579, 'B-C': 0.513215859030837, 'I-C': 0.5112309074573226, 'B-P': 0.7818181818181819, 'I-P': 0.8907975460122699, 'B-MC': 0.6018518518518519, 'I-MC': 0.5647969052224371}, 'f1': {'O': 0.9081940051212533, 'B-C': 0.4931216931216931, 'I-C': 0.5281150901075102, 'B-P': 0.7392550143266475, 'I-P': 0.858409695536506, 'B-MC': 0.6388206388206389, 'I-MC': 0.6759259259259259}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.806534920278464, 'recall': 0.806534920278464, 'f1': 0.806534920278464, 'support': None}, 'macro_avg': {'precision': 0.7123409172532239, 'recall': 0.6808561902075368, 'f1': 0.6916917232800249, 'support': None}, 'weighted_avg': {'precision': 0.8050584848519632, 'recall': 0.806534920278464, 'f1': 0.8032266643905135, 'support': None}}
			------------EPOCH 27---------------
Loss:  tensor(0.0083, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0039, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0445, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0072, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1823, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0036, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0830, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0034, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0079, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0076, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0128, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0213, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2107, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0817, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0039, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1930, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0616, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0028, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0100, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2360, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0005, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0452, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0028, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0011, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0024, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0271, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1506, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0497, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.8938457008244994, 'B-C': 0.4745417515274949, 'I-C': 0.5621174524400331, 'B-P': 0.7014809041309431, 'I-P': 0.8412351081935328, 'B-MC': 0.631578947368421, 'I-MC': 0.7972800679983001}, 'recall': {'O': 0.9084923307145529, 'B-C': 0.513215859030837, 'I-C': 0.5088349805330937, 'B-P': 0.7792207792207793, 'I-P': 0.8844580777096115, 'B-MC': 0.6111111111111112, 'I-MC': 0.6047711154094133}, 'f1': {'O': 0.9011095031355523, 'B-C': 0.4931216931216931, 'I-C': 0.5341507506091331, 'B-P': 0.7383100902379, 'I-P': 0.8623052959501558, 'B-MC': 0.6211764705882353, 'I-MC': 0.6878093492208983}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8080170671457444, 'recall': 0.8080170671457444, 'f1': 0.8080170671457444, 'support': None}, 'macro_avg': {'precision': 0.7002971332118892, 'recall': 0.6871577505327713, 'f1': 0.6911404504090811, 'support': None}, 'weighted_avg': {'precision': 0.803724749117595, 'recall': 0.8080170671457444, 'f1': 0.8044343798021965, 'support': None}}
			------------EPOCH 28---------------
Loss:  tensor(0.0066, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0031, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0342, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0066, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1403, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0035, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0731, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0032, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0070, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0066, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0124, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0165, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0032, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2180, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0034, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0908, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0037, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1750, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0505, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0027, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0090, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2041, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0005, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0311, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0022, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0237, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1729, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0657, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.904380980861244, 'B-C': 0.4500907441016334, 'I-C': 0.5319488817891374, 'B-P': 0.7242798353909465, 'I-P': 0.852361510156762, 'B-MC': 0.6118721461187214, 'I-MC': 0.7868248772504092}, 'recall': {'O': 0.9051253273475496, 'B-C': 0.5462555066079295, 'I-C': 0.5485175202156334, 'B-P': 0.7619047619047619, 'I-P': 0.864519427402863, 'B-MC': 0.6203703703703703, 'I-MC': 0.6199226305609284}, 'f1': {'O': 0.9047530010096856, 'B-C': 0.4935323383084577, 'I-C': 0.5401061633736362, 'B-P': 0.7426160337552743, 'I-P': 0.8583974212543467, 'B-MC': 0.6160919540229884, 'I-MC': 0.6934727731698521}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8051875140354817, 'recall': 0.8051875140354817, 'f1': 0.8051875140354817, 'support': None}, 'macro_avg': {'precision': 0.694536996524122, 'recall': 0.6952307920585766, 'f1': 0.6927099549848916, 'support': None}, 'weighted_avg': {'precision': 0.8067679573535984, 'recall': 0.8051875140354817, 'f1': 0.805190208449581, 'support': None}}
			------------EPOCH 29---------------
Loss:  tensor(0.0071, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0027, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0294, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0061, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1257, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0028, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0664, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0028, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0058, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0060, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0119, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0160, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0029, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1985, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0034, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1214, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0036, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1762, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0589, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0079, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2023, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0004, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0347, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0023, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0010, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0027, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0020, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0194, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1442, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0546, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.915097205346294, 'B-C': 0.4600760456273764, 'I-C': 0.5354657189397305, 'B-P': 0.724979658258747, 'I-P': 0.8436808730275771, 'B-MC': 0.6470588235294118, 'I-MC': 0.8032076289553532}, 'recall': {'O': 0.9016086793864572, 'B-C': 0.5330396475770925, 'I-C': 0.5414794848757113, 'B-P': 0.7714285714285715, 'I-P': 0.8774539877300613, 'B-MC': 0.6111111111111112, 'I-MC': 0.5973565441650548}, 'f1': {'O': 0.908302868126484, 'B-C': 0.4938775510204082, 'I-C': 0.5384558111830839, 'B-P': 0.7474832214765101, 'I-P': 0.8602360724757537, 'B-MC': 0.6285714285714287, 'I-MC': 0.6851543723423924}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8072535369413878, 'recall': 0.8072535369413878, 'f1': 0.8072535369413877, 'support': None}, 'macro_avg': {'precision': 0.7042237076692129, 'recall': 0.6904968608962943, 'f1': 0.6945830464565802, 'support': None}, 'weighted_avg': {'precision': 0.8081305389143877, 'recall': 0.8072535369413878, 'f1': 0.8064266143670111, 'support': None}}
			------------EPOCH 30---------------
Loss:  tensor(0.0057, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0025, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0029, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0292, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0060, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1419, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0760, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0049, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0060, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0118, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0185, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.2037, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0998, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0030, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1715, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0469, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0023, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0072, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1799, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0004, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0413, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0020, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0009, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0027, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0020, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0215, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.1489, device='cuda:0', grad_fn=<DivBackward0>)
Loss:  tensor(0.0572, device='cuda:0', grad_fn=<DivBackward0>)
				 {'precision': {'O': 0.9011177347242921, 'B-C': 0.4861407249466951, 'I-C': 0.5710074562164037, 'B-P': 0.6930091185410334, 'I-P': 0.8292105013742773, 'B-MC': 0.6310679611650486, 'I-MC': 0.812}, 'recall': {'O': 0.9048260381593715, 'B-C': 0.5022026431718062, 'I-C': 0.49311171009284216, 'B-P': 0.7896103896103897, 'I-P': 0.8945807770961145, 'B-MC': 0.6018518518518519, 'I-MC': 0.5889748549323017}, 'f1': {'O': 0.9029680791487773, 'B-C': 0.49404117009750814, 'I-C': 0.5292085174768983, 'B-P': 0.7381626871711858, 'I-P': 0.8606561408686242, 'B-MC': 0.6161137440758294, 'I-MC': 0.6827354260089686}, 'support': {'O': 13365, 'B-C': 454, 'I-C': 6678, 'B-P': 1155, 'I-P': 19560, 'B-MC': 216, 'I-MC': 3102}, 'micro_avg': {'precision': 0.8080170671457444, 'recall': 0.8080170671457444, 'f1': 0.8080170671457444, 'support': None}, 'macro_avg': {'precision': 0.7033647852811072, 'recall': 0.6821654664163825, 'f1': 0.6891265378353989, 'support': None}, 'weighted_avg': {'precision': 0.8028801218220641, 'recall': 0.8080170671457444, 'f1': 0.8031541753431756, 'support': None}}
