Iter #50: [tensor([ 1.1440e-01,  1.1970e-01,  7.0635e-02,  3.4446e-02, -2.9395e-02,
        -4.9172e-02, -5.9674e-02,  7.9734e-02, -1.2503e-01,  8.0975e-03,
         4.7698e-02,  2.6338e-02,  4.2579e-02,  2.5453e-02, -1.8283e-02,
        -5.9403e-05,  1.7134e-02,  6.5225e-03, -2.7184e-02, -8.1346e-04,
         1.1450e-01,  1.4338e-02,  9.5577e-03,  7.1400e-02,  1.5307e-02,
        -1.1004e-02,  3.3234e-02, -1.9171e-02,  3.1838e-02,  3.2579e-03,
         1.4616e-02, -3.8977e-02, -2.1702e-02, -6.0988e-02,  1.0560e-02,
         5.0605e-02, -5.5814e-02,  2.2080e-02,  3.7412e-02,  5.8842e-02,
         1.9687e-02,  6.9713e-03,  1.4587e-02,  2.5921e-02,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2610,  0.9606, -0.0981,  0.8650,  0.0231,  0.7857,  1.9567,  0.0946,
         0.8266,  0.2475,  0.4657,  0.6397,  0.5705,  0.7096,  0.8138,  0.0950,
        -0.0220, -0.3411, -0.3781, -0.4710, -0.3086, -1.3971, -0.3687,  0.8177,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6004,  0.0771, -0.4982,  0.2857, -0.1693,  0.7885, -0.1179,  0.1055,
         0.2303,  0.0511, -0.0863, -0.1055,  0.0747,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3278, -0.3786, -0.5048, -0.0582,  0.0383,  0.3213,  0.1875,  0.0018,
         0.0984,  0.0809,  0.3482, -0.6101,  0.0178,  0.3917, -0.1630, -0.4451,
        -0.1036,  0.1715,  0.2475, -0.3443,  0.1860,  0.4885,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1593,  0.1070, -0.0213, -0.0116,  0.0417,  0.0730,  0.0733,  0.1639,
         0.1427,  0.0004,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2464,  0.0602, -0.0860,  0.1024, -0.0255, -0.0230,  0.1118, -0.0368,
        -0.2164, -0.0348, -0.0282,  0.0065,  0.0472,  0.0211, -0.0754, -0.0812,
        -0.0478,  0.1160,  0.0390,  0.0920, -0.0642,  0.5312, -0.1460,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1453, -0.0420,  0.0812, -0.0486, -0.0060,  0.0079, -0.0088,  0.0667,
        -0.0099, -0.0109,  0.0013,  0.0045,  0.0319,  0.0255, -0.0430, -0.0487,
        -0.0063, -0.0469, -0.0206, -0.0107,  0.0549,  0.0379, -0.0100,  0.0019,
         0.0185,  0.0336, -0.0229,  0.0205, -0.0009,  0.0012, -0.0193,  0.0496,
        -0.0298, -0.0056, -0.0189, -0.0371, -0.0677, -0.0336,  0.0391,  0.0768,
         0.0398,  0.0121, -0.0202,  0.0313,  0.0138], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0858, -0.0822,  0.0516, -0.1361,  0.0681,  0.0180, -0.0733, -0.0384,
        -0.1017, -0.0016, -0.0448, -0.0374,  0.0562, -0.0787, -0.0244,  0.1090,
        -0.1810,  0.0871,  0.2579, -0.1121,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3020,  0.3689,  0.2319, -0.4927,  0.1612, -0.1221, -0.1584, -0.1136,
        -0.0652,  0.2511,  0.1542,  0.1117,  0.0673, -0.0020, -0.1405,  0.0354,
        -0.0164,  0.1773, -0.2626, -0.0885, -0.4863, -0.0691, -0.2337, -0.3905,
         0.7955, -0.9546,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2918,  0.2119,  0.3125, -0.0801,  0.3335,  0.5263,  0.0520,  0.1727,
         0.0277,  0.1501,  0.2134,  0.3107,  0.1006,  0.1559,  0.1042,  0.1969,
         0.1445,  0.1159, -0.1680, -0.0059,  0.0523,  0.1070, -0.0904,  0.2349,
        -0.0363,  0.1071,  0.5195, -0.4212,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0010, -0.1268, -0.0023, -0.5427, -0.2352,  0.1327,  0.1001,  0.7072,
         0.0748,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1152,  0.0017, -0.0214, -0.2878, -0.0221,  0.2156, -0.0326, -0.0557,
        -0.0024,  0.0592,  0.1345, -0.0311, -0.0641,  0.0784, -0.0752,  0.0619,
        -0.2358,  0.2234, -0.0441,  0.0320, -0.0360, -0.0457, -0.0006, -0.0053,
         0.0640,  0.2172,  0.0928,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.0934, -0.2198, -0.0263, -0.0749,  0.0163, -0.0544,  0.0609, -0.0817,
         0.0105,  0.0104,  0.0545,  0.0681,  0.0222,  0.0965,  0.0803,  0.0588,
         0.1140,  0.0229,  0.1346,  0.1076, -0.1489,  0.2183,  0.7532,  0.2891,
         0.0633,  0.0709, -0.1506, -0.0440,  0.0577, -0.0079,  0.0429,  0.1050,
        -0.0032,  0.0278, -0.0371, -0.0452,  0.1443,  0.0314,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0698,  0.0143,  0.0043, -0.0117, -0.0113,  0.0198, -0.0063, -0.0111,
         0.0155,  0.0160, -0.0342,  0.0030, -0.0054, -0.0145, -0.0150,  0.0159,
        -0.0091,  0.0044, -0.0733,  0.0197, -0.0426, -0.0164,  0.0714, -0.0609,
        -0.1332,  0.0611, -0.1275, -0.0850, -0.0266, -0.0070,  0.0131, -0.0009,
        -0.0271, -0.0354, -0.0031,  0.0123,  0.0058,  0.1404,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0372, -0.8268,  0.1409, -0.3571,  0.0660, -0.0265, -0.1044, -0.0324,
        -0.0725, -0.0758, -0.0440,  0.0275, -0.0404, -0.1239, -0.0200, -0.0325,
         0.0549, -0.0561, -0.1641,  0.2047, -0.0324, -0.1322,  0.1953, -0.1187,
         0.0359,  0.1785,  0.0674,  0.3939, -0.2219, -0.0957,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1403,  0.0128, -0.0604,  0.0927,  0.0308,  0.0773,  0.0154, -0.0026,
         0.0634,  0.0068,  0.0068, -0.0030, -0.0234,  0.0306,  0.0080,  0.0189,
         0.0197,  0.0138,  0.0253,  0.0467,  0.0222, -0.0283,  0.0222,  0.1610,
         0.2353, -0.0426,  0.0706,  0.0306, -0.0083, -0.0131, -0.0464,  0.0917,
         0.0262, -0.0024,  0.0355,  0.0080, -0.1107, -0.0976,  0.0713,  0.0033],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2077, -0.0162, -0.1335,  0.0598,  0.0263, -0.0088, -0.0552, -0.0178,
        -0.0758, -0.0166, -0.0207,  0.0146,  0.0927,  0.0165, -0.0590,  0.0530,
        -0.0465, -0.0304, -0.1383,  0.0080, -0.0854, -0.0258, -0.0656,  0.0337,
        -0.0861, -0.0035, -0.0404, -0.0103, -0.0326,  0.0033,  0.0154,  0.1040,
        -0.0095, -0.1075,  0.1545,  0.1000, -0.4061,  0.0698,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1100,  0.0294,  0.2463,  0.6000,  0.1931,  0.1477,  0.2615, -0.2047,
         0.0466,  0.0693,  0.0794,  0.2451,  0.1614,  0.1745,  0.2280,  0.2109,
        -0.1590, -0.0228,  0.0354,  0.0259, -0.1011,  0.2608, -0.1680,  0.2394,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1288,  0.1742,  0.1782,  0.0170,  0.0026,  0.0263, -0.0075, -0.0872,
         0.0263,  0.1268,  0.0396,  0.0856, -0.0324,  0.0177,  0.0040,  0.0778,
        -0.0499, -0.1387,  0.0370, -0.0694, -0.0224,  0.0518, -0.0010,  0.0303,
         0.0217,  0.0381,  0.0035,  0.0296,  0.0477, -0.0332,  0.0163, -0.1940,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0383, -0.0341,  0.1202,  0.1136, -0.0728, -0.0231,  0.1837, -0.0991,
         0.0196, -0.0076, -0.0972, -0.0421,  0.1605, -0.0521,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0522,  0.1396,  0.0652, -0.1288, -0.2015,  0.0671, -0.0091,  0.0456,
         0.1029, -0.0047,  0.1115,  0.0425, -0.0924, -0.0680, -0.0751, -0.1347,
        -0.1179, -0.0420, -0.0014,  0.0687, -0.1036,  0.2027,  0.3604,  0.0277,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4970,  0.8382,  0.3173,  0.0078, -0.2109, -0.1399,  0.1189,  0.1499,
        -0.0900, -0.0919, -0.0116,  0.0372, -0.1293, -0.1294,  0.1111,  0.0515,
         0.5808,  0.1835,  0.1216, -0.1976, -0.1946,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7635,  0.2286,  0.1603, -0.0179, -0.0322, -0.0046, -0.1272,  0.1135,
         0.0187,  0.0429, -0.0123,  0.0663,  0.0897,  0.2077, -0.0938,  0.0791,
         0.0055,  0.0020, -0.0073,  0.0620,  0.0465,  0.0249,  0.0176, -0.0267,
         0.1150,  0.1961, -0.0421,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1841, -0.1780,  0.0934,  0.1111,  0.2421,  0.0497,  0.0094,  0.0194,
         0.1787,  0.1609,  0.1534,  0.1009,  0.0655, -0.0027, -0.2638,  0.0555,
         0.4101,  0.3156,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.0843, -0.1340,  0.2498,  0.4736,  0.0727,  0.1761, -0.1131, -0.0515,
         0.0199,  0.0581, -0.2435,  0.1675,  0.0035, -0.0940,  0.0249,  0.0331,
         0.0147,  0.1189, -0.0292,  0.2797,  0.0922,  0.3965,  0.0827, -0.0279,
         0.0074, -0.0549, -0.0809,  0.0427,  0.0425,  0.0207,  0.0719,  0.0455,
         0.1544,  0.0323, -0.0252, -0.0927,  0.2258, -0.0424, -0.2790,  0.4726,
        -0.1089,  0.0390,  0.0132, -0.0029,  0.1875,  0.2140,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2811,  0.0061,  0.0248,  0.0579, -0.0495,  0.0417,  0.0687, -0.0624,
         0.1256, -0.0405,  0.0249, -0.1099,  0.0699,  0.0138,  0.0331, -0.0171,
         0.0310,  0.0348,  0.0098,  0.0837,  0.0158, -0.1507,  0.0775, -0.0828,
         0.2143,  0.0252,  0.0318,  0.0939, -0.0841, -0.0071, -0.0245,  0.0204,
         0.0645,  0.0062,  0.0327,  0.0042,  0.0166,  0.0701,  0.0143,  0.0187,
        -0.0320,  0.0246,  0.0850,  0.0773,  0.0712,  0.1207,  0.0502,  0.0079],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1679, -0.0890,  0.2237, -0.0660,  0.0532, -0.4091,  0.0691,  0.2646,
         0.0031,  0.1164,  0.1128,  0.0511,  0.0776,  0.0682, -0.0047, -0.0560,
         0.0454, -0.0216,  0.0689,  0.0327,  0.1039, -0.1785, -0.1232,  0.0319,
        -0.0210,  0.0167, -0.0653,  0.0511,  0.0734,  0.0213, -0.0237,  0.1941,
         0.0579,  0.1989,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5453, -0.6707, -2.4418, -1.1308,  0.5672, -0.5965,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2720,  0.1470,  0.0387,  0.0831, -0.0763,  0.1539,  0.0172,  0.0429,
        -0.0126,  0.0042, -0.0117,  0.0020,  0.0080,  0.1344,  0.1391,  0.0756,
         0.1306, -0.0423,  0.1019,  0.0989,  0.0289, -0.2082, -0.0024, -0.0126,
        -0.0811,  0.0693,  0.0610,  0.0205,  0.1049,  0.0682, -0.0022,  0.0192,
         0.0133, -0.0412,  0.0068, -0.0146,  0.0171, -0.0606,  0.0431, -0.0104,
         0.0223, -0.0044,  0.0400,  0.0503,  0.1883, -0.1213,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1149e-02,  7.6282e-02, -3.8745e-02,  2.3048e-02,  3.9388e-02,
         2.7045e-03, -3.9355e-02,  5.5095e-05, -9.2569e-03, -6.0099e-02,
        -2.3526e-02, -6.1874e-03,  8.4525e-02,  1.9549e-02, -1.5417e-02,
        -6.7079e-03,  1.1612e-01, -3.5456e-02,  1.5589e-01,  3.3531e-03,
        -3.2887e-01,  4.0601e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3964,  0.0398,  0.1283,  0.0719,  0.5584,  0.0549, -0.1022,  0.2399,
        -0.2991, -0.2202,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1456,  0.0404,  0.0053,  0.0248,  0.1249, -0.0787,  0.0761, -0.0647,
        -0.0499,  0.0419,  0.0028, -0.0373, -0.1299, -0.1713,  0.0270, -0.1289,
         0.0312, -0.0073,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5972, -0.0667,  0.1439,  0.1540,  0.1337,  0.0579,  0.2067, -0.0016,
         0.1137, -0.1082,  0.3245, -0.0737, -0.2186,  0.1074,  0.0465, -0.1559,
        -0.2329,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1390, -0.1334, -0.0251, -0.0711,  0.0189, -0.0229, -0.0130,  0.0094,
        -0.0216,  0.1326, -0.0035,  0.0797, -0.0377,  0.2487, -0.0573, -0.1277,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2120, -0.5576,  0.1006, -0.2001, -0.2465, -0.0879,  0.2135,  0.5113,
        -0.3092, -0.3787,  0.0655, -0.0395, -0.0807,  0.0223, -0.0045, -0.0197,
        -0.2506,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3394,  0.0672, -0.1782, -0.0527, -0.2483,  0.0209, -0.0473,  0.0397,
         0.0468, -0.1878,  0.0279,  0.1140, -0.0295,  0.0201, -0.1913,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.2388,  0.0197,  0.0583,  0.0402,  0.1218, -0.0546,  0.0196,  0.1290,
         0.0308,  0.0142, -0.0704, -0.0320,  0.0175, -0.0815,  0.2414,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3036, -0.2140,  0.1447,  0.1384,  0.2234, -0.1590, -0.0727,  0.1970,
         0.0695,  0.2858,  0.0994, -0.1058,  0.0091, -0.0276,  0.0809, -0.0877,
        -0.3025,  0.0130,  0.1526,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4911, -0.1393, -0.3532,  0.0291,  0.2233,  0.1324,  0.1143, -0.0827,
        -0.0081,  0.5131,  0.3261,  0.2837,  0.1215,  0.1704,  0.3000,  0.5857,
        -0.4067,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4882,  0.1141,  0.0157,  0.1508,  0.1171, -0.1346, -0.0246,  0.2577,
         0.1496, -0.1441, -0.0938, -0.1725, -0.0493,  0.0267, -0.2239,  0.1444,
         0.1987,  0.0924,  0.2768, -0.0019,  0.0178,  0.0761, -0.0242,  0.2388,
        -0.0580,  0.0842,  0.0491,  0.0087, -0.1010,  0.0121, -0.0806, -0.0380,
        -0.0940, -0.0871,  0.0789, -0.0983,  0.1109, -0.0275,  0.3814, -0.0415,
         0.0806,  0.0421, -0.0748, -0.0213,  0.1013, -0.0628, -0.2978, -0.2089,
        -0.0730, -0.2237], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5778,  0.2620,  0.6936, -0.0345, -0.1498,  0.0930, -0.1162, -0.6456,
         0.2807,  0.4823, -0.7029, -0.4627, -0.1411,  0.1708,  0.2353,  0.4283,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2654,  0.3615, -0.0038,  0.0566,  0.0019,  0.1304,  0.0147, -0.0405,
         0.1263,  0.0452, -0.0992,  0.0134, -0.0472, -0.0433,  0.0409, -0.3682,
         0.1352, -0.0951, -0.0222, -0.0423,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1036, -0.0972,  0.1084,  0.1164,  0.0042, -0.0119, -0.0079, -0.0186,
        -0.0391, -0.0134, -0.0012,  0.0004, -0.0107,  0.0036, -0.0519,  0.0358,
        -0.0146,  0.0164,  0.0201, -0.0026, -0.1033, -0.0334,  0.0655,  0.0753,
         0.0012,  0.0072, -0.0252,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1215, -1.2198,  0.1439, -0.1336, -0.4876,  0.1069,  0.0686,  0.0574,
         0.4342,  0.1632,  0.2871,  0.2124, -0.2088, -0.0900,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1839, -0.0382, -0.0207, -0.1030,  0.0710,  0.0205, -0.0785, -0.0423,
         0.0086, -0.0670, -0.0394, -0.0327, -0.2038, -0.0503,  0.0350, -0.1639,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5476, -0.0658,  0.0501, -0.2195,  0.0194,  0.0729,  0.0214,  0.1813,
        -0.1007,  0.0629, -0.0260, -0.0219,  0.0490, -0.0217, -0.0188, -0.2011,
         0.1278,  0.0258, -0.0862,  0.0239,  0.0228, -0.0024, -0.0080, -0.0143,
         0.0326,  0.1377,  0.1995,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0005, -0.0814,  0.0876, -0.0346,  0.0473, -0.0107,  0.0209, -0.0768,
         0.0823, -0.0343,  0.0165,  0.0657,  0.0396, -0.0291,  0.0343,  0.0466,
         0.0407, -0.0068,  0.0332,  0.0493,  0.0180,  0.0005,  0.0452, -0.0254,
         0.0941, -0.1785,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0452, -0.0572,  0.0443,  0.0145, -0.1574, -0.4090,  0.0153,  0.1353,
         0.1966,  0.0814, -0.1124,  0.1941, -0.3663,  0.0676,  0.0218,  0.0902,
        -0.0430,  0.1184, -0.0750, -0.1148,  0.0872, -0.1106,  0.0308,  0.0464,
         0.0992,  0.0755,  0.0253, -0.2452,  0.1270,  0.0133,  0.1085,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.0175, -0.3915,  0.7790,  0.5242,  0.0159,  0.3111, -0.1897,  0.0664,
        -0.5552,  0.2210,  0.1618,  0.2687,  0.3874,  0.1954,  0.0954, -0.3258,
        -0.5928,  0.1259,  0.3191,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1840, -0.0053,  0.1515, -0.9006, -0.2110, -0.1782,  0.3200,  0.0606,
        -0.4635,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3467,  0.2448,  0.0713, -0.0780,  0.0348, -0.1624,  0.0389, -0.0835,
        -0.0237,  0.0215,  0.1264, -0.2091,  0.1057, -0.0425, -0.1258,  0.0630,
         0.0720, -0.0574,  0.0962,  0.9471, -0.1926,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0930, -0.0580,  0.0082,  0.0275,  0.0687, -0.0205, -0.1065, -0.0681,
         0.0151,  0.0214, -0.0363,  0.0527, -0.0132,  0.0301,  0.0058,  0.0445,
         0.0225,  0.0363,  0.0299,  0.0304, -0.0113,  0.0521,  0.0126, -0.0345,
         0.0818,  0.1227, -0.0256,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0630, -0.0362,  0.0363, -0.0067, -0.1162,  0.1085,  0.8516, -0.3815,
         0.1306,  0.2599,  0.1163,  0.2447, -0.1692,  0.0623,  0.0964,  0.0864,
        -0.0708, -0.0898,  0.1157,  0.2095, -0.0653, -0.0630,  0.1440,  0.2625,
        -0.0565, -0.1464,  0.2106,  0.5144,  0.0531, -0.2251, -0.0879, -0.0228,
        -0.0789, -0.0432, -0.0877, -0.0455, -0.0361,  0.1104, -0.1749,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0028, -0.0641, -0.0432, -0.0318,  0.0504,  0.0754,  0.0128, -0.0800,
        -0.0181, -0.0451,  0.0488,  0.0013, -0.0783, -0.0023,  0.0283,  0.0171,
         0.0778, -0.0658,  0.0005, -0.0384,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1139, -0.2962, -0.0505,  0.2185,  0.1554,  0.4200,  0.0976,  0.0068,
         0.1485,  0.0975, -0.1947, -0.1211, -0.3389,  0.0150, -0.0477,  0.0273,
        -0.0957, -0.0385, -0.1240, -0.0966,  0.1542, -0.0489, -0.3927,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2226, -0.3584,  3.7763,  0.6971,  2.9797,  2.4229, -1.1356,  0.5678,
         0.8311,  1.5905,  0.3948, -0.0766,  0.5369, -5.0449, -2.3319, -3.9786,
        -2.0847,  0.4141, -0.3251,  1.8896,  1.3044,  0.9430,  0.2690,  2.7171,
         1.6110, -1.4279, -1.9877, -3.4243,  1.0892, -1.5291, -0.2763, -0.8012,
        -1.4404,  0.0564, -0.6569, -2.1569, -0.4951,  2.3404,  6.6615,  0.7305],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3160, -0.1260, -0.0153, -0.0427,  0.2983,  0.1027,  0.0745,  0.2444,
        -0.0758,  0.0250,  0.0131, -0.0068, -0.0148,  0.0338,  0.0562, -0.0285,
         0.0295,  0.0534, -0.0756, -0.0295,  0.0765, -0.0178,  0.0257,  0.2655,
         0.0484,  0.0416,  0.2692, -0.0282, -0.0106, -0.0386, -0.0057,  0.0470,
        -0.0812, -0.0645, -0.0488,  0.2884,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1406, -0.0686, -0.0774,  0.0783,  0.0569, -0.0610,  0.1068, -0.0042,
         0.0051,  0.0516,  0.0021, -0.0236,  0.0323, -0.0290, -0.0210, -0.0466,
         0.0015,  0.1176,  0.1809,  0.0108,  0.0467,  0.0615,  0.0719, -0.0279,
         0.0051,  0.1286, -0.3260,  0.0233,  0.0461,  0.0630, -0.1337,  0.2825,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1376,  0.0751, -0.0100, -0.0004,  0.0219, -0.0128,  0.1133,  0.0092,
        -0.0261,  0.1514,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2231,  0.0999, -0.3413,  0.0508,  0.1944, -0.0961, -0.0136,  0.0644,
        -0.1599,  0.0525, -0.1600, -0.0119,  0.1489,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.3359,  0.0008,  0.0977,  0.1063,  0.3514,  0.3463, -0.2859, -0.0393,
         0.0633,  0.0364,  0.1074, -0.1696,  0.3319, -0.1055,  0.4031,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0678, -0.0154,  0.0286, -0.0461, -0.0738, -0.0239, -0.0350,  0.0162,
         0.0230, -0.0456,  0.0571, -0.0018,  0.0407,  0.0132,  0.1140, -0.0018,
         0.0774,  0.0311,  0.0193,  0.0010,  0.0132,  0.0207, -0.0625, -0.0195,
        -0.0248,  0.0018,  0.0023, -0.0377, -0.0242, -0.0861,  0.0021, -0.0371,
        -0.0220,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4519,  0.6065,  0.4293,  0.4032,  0.0796,  1.2098, -0.3162, -0.3832,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1385,  0.0241,  0.0186, -0.1806, -0.2705, -0.0900,  0.3593, -0.0804,
        -0.1203,  0.1220, -0.0952,  0.1138,  0.0831, -0.3386, -0.1425, -0.1233,
        -0.0035,  0.1079, -0.1251, -0.1187,  0.0468,  0.0325,  0.1377,  0.3350,
         0.1777,  0.1104,  0.2206,  0.0433,  1.3609, -0.0997,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0681,  0.4136, -0.2857, -0.1348,  1.1213,  0.1160, -0.0639,  0.1182,
         0.0506,  0.0933, -0.0100, -0.0052, -1.0675, -0.0289,  0.0889,  0.3946,
         0.3014,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7007e-01,  1.1132e-01,  1.9156e-01, -1.4192e-01, -1.8887e-01,
         3.8542e-01,  1.3349e-01, -6.8165e-02, -3.0619e-02,  9.0901e-04,
         1.1460e-01, -3.7239e-02,  6.7216e-02,  4.1657e-02, -1.1857e-01,
        -1.6686e-01, -9.6791e-03, -4.1010e-02,  2.1639e-01, -3.2730e-04,
        -1.6495e-01, -3.7625e-01, -1.7725e-01,  1.5885e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9386e-02,  7.0647e-02, -1.1952e-01, -6.0399e-02,  9.7704e-02,
        -1.1204e-04,  7.7832e-03, -6.0023e-02,  2.2610e-02,  4.3221e-02,
         8.9962e-03, -2.4147e-02, -4.0554e-02,  6.1262e-02,  1.7215e-02,
        -7.1600e-02, -1.5186e-04,  3.5477e-02,  1.9181e-02, -3.8655e-02,
         7.8860e-03,  6.3317e-03,  3.8794e-02,  4.5557e-02, -4.1022e-03,
         6.9948e-02,  4.0496e-02, -4.5711e-02, -1.0648e-01, -3.5517e-02,
        -6.8574e-02, -4.5403e-02,  5.6816e-03,  1.5501e-01, -1.4491e-02,
         9.0036e-02, -1.6394e-01,  3.3782e-01,  1.5519e-02, -6.1264e-02,
         1.2267e-01, -3.8251e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3048,  0.0341,  0.0186, -0.0951,  0.0481, -0.1282,  0.0791, -0.0516,
         0.0217,  0.0204, -0.0616, -0.0251, -0.0148,  0.0041, -0.0173, -0.0845,
        -0.0822, -0.0063,  0.0312, -0.0244, -0.0378, -0.0988, -0.0234,  0.0213,
        -0.0193,  0.0594,  0.0663,  0.0538,  0.0306, -0.1230, -0.0167,  0.0181,
         0.1799, -0.1301,  0.0190,  0.0700,  0.1252,  0.0325,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8700, -0.3674, -0.0036, -1.0633,  0.3007,  0.1397,  0.0614,  0.1580,
         0.0755,  0.5577, -0.0335,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5702,  1.8464,  0.1456, -0.0850, -0.2739,  0.2927, -0.0924,  0.0163,
        -0.1439,  0.1503,  0.1107,  0.0326, -0.0958,  0.0664,  0.0263, -0.0210,
         0.3749,  0.0528, -0.1159, -0.0252, -0.0266,  0.0833, -0.0232,  0.0057,
        -0.0194, -0.0238,  0.0068, -0.4128, -0.0261, -0.3471,  0.4098, -0.6398,
         0.7960,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0516e-01,  1.0529e-01, -1.2922e-02, -4.1119e-02,  6.2589e-02,
         3.0100e-02,  2.0742e-02, -5.9383e-02,  3.2444e-05,  1.1878e-01,
         1.1954e-01,  3.8175e-02, -5.0550e-02, -2.0678e-02, -8.7740e-02,
        -5.6067e-02, -1.3176e-02,  9.4972e-04, -9.2748e-02,  1.6450e-02,
         2.0458e-02, -1.1205e-02, -4.0580e-03, -6.7889e-03,  5.2437e-03,
        -1.8300e-02, -3.1517e-04,  1.6695e-02,  6.4309e-02,  4.6651e-03,
         1.0116e-01, -9.1411e-02, -3.8557e-03,  9.2039e-02,  2.1365e-02,
         3.0869e-02, -7.0046e-02, -1.7651e-04,  2.0304e-03,  4.5614e-02,
        -5.7302e-02, -4.6476e-03,  1.2912e-02,  1.0969e-02, -7.4016e-03,
         3.3845e-03,  4.0149e-02,  6.4029e-02, -2.2091e-02,  1.3061e-02,
        -2.7303e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0033e-01, -1.5411e+00, -1.3967e-01,  4.4300e-02, -6.5807e-02,
         5.8151e-02,  1.0643e-01, -9.3897e-02,  1.3473e-01, -1.4587e-01,
        -2.0504e-01, -1.1033e-02,  2.0828e-01, -2.3375e-01,  5.6937e-02,
         1.3941e-01, -3.3038e-03,  5.0332e-02, -2.9412e-02,  4.9847e-02,
        -1.0452e-01, -8.7033e-02,  9.5589e-02,  2.7638e-02,  2.4741e-02,
         5.8257e-02,  8.0021e-02,  6.1514e-03, -2.8345e-02, -2.2099e-02,
        -4.0833e-02, -1.0655e-03, -5.2327e-02, -2.5626e-02,  1.1080e-01,
        -2.2284e-01,  3.2092e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.3437, -0.0573,  0.8091,  0.2182, -0.0754,  0.0577, -0.0514, -0.2190,
        -0.2277, -0.0987, -0.1441, -0.0819,  0.0972,  0.0386, -0.0237,  0.0963,
         0.2133, -0.0467,  0.0353,  0.0405,  0.2518,  0.0879, -0.1877,  0.5143,
        -0.2364, -0.2827, -0.0614,  0.0266,  0.1008, -0.1416, -0.1157, -0.0144,
        -0.1857,  0.0905, -0.0437, -0.1848,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3816, -0.0658,  0.0295,  0.1091,  0.0123, -0.0193,  0.0828, -0.0410,
         0.0419,  0.0137,  0.0814, -0.1147,  0.0134,  0.0125,  0.0086,  0.0918,
         0.0073,  0.1077,  0.0414,  0.1107,  0.0537,  0.1639, -0.0312, -0.0383,
         0.1241, -0.0183, -0.0261,  0.0390, -0.0298, -0.0159, -0.0588,  0.2471,
         0.0021, -0.0068,  0.0324,  0.0747,  0.0657, -0.0611, -0.0245, -0.0087,
         0.0800, -0.0164,  0.0271, -0.0180,  0.0275, -0.0338, -0.0531,  0.2811,
         0.0080, -0.0500,  0.0080,  0.0262,  0.1690, -0.2354,  0.2495],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3733, -0.2369, -0.0860, -0.0023, -0.0963,  0.0266,  0.0478, -0.1029,
        -0.1970, -0.1136, -0.0338, -0.0950,  0.1348, -0.1750,  0.2083, -0.0164,
         0.0155, -0.0381,  0.0092,  0.1303, -0.0344, -0.1024, -0.0445, -0.1398,
        -0.3802,  0.0286,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4321, -0.0732, -0.3643,  0.0966, -0.1217, -0.2925,  0.1730,  0.0552,
         0.0480, -0.1200, -0.0533, -0.0849, -0.0869,  0.1748,  0.6668,  0.1514,
         0.3227, -0.0601, -0.2129,  0.2420, -0.0546,  0.4320,  0.2528,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5455,  0.5923,  0.0135,  0.6771, -0.0415,  0.2886,  0.0685,  0.2193,
        -0.1886, -0.1523,  0.2025,  0.0590,  0.2984, -0.0204, -0.1610,  0.1847,
         0.0474,  0.2164,  0.1229, -0.1431, -0.0242,  0.1071, -0.1641,  0.1407,
         0.0238,  0.0267,  0.5488,  0.2545,  0.1519,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1509,  0.0191, -0.2586,  0.0247, -0.1827, -0.4922,  0.3666,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1746, -0.0534,  0.3191,  0.0505,  0.0915, -0.5492,  0.0616, -0.1234,
         0.0745, -0.2529, -0.0287,  0.0100, -0.0293, -0.0998, -0.0634, -0.0083,
        -0.0175,  0.0242, -0.2903,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2271,  0.0117, -0.1320, -0.1289, -0.0291,  0.1061,  0.0439,  0.1019,
        -0.1392,  0.0222,  0.0230, -0.0823,  0.0045,  0.0515, -0.0430, -0.1483,
        -0.0238,  0.0041, -0.0175, -0.0428, -0.0839, -0.0396, -0.0521,  0.0379,
        -0.0716, -0.0462, -0.2307,  0.0899,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3778e-02, -1.3519e-01, -1.2417e-01,  1.6750e-01,  1.2616e-01,
         1.1236e-01,  1.7584e-01, -1.4802e-02,  3.3191e-03, -7.0073e-02,
        -1.1951e-01, -1.2891e-02, -7.2372e-02,  2.3238e-02, -1.8203e-02,
        -4.7217e-02, -1.0005e-02, -5.1500e-02,  2.9270e-02, -3.9514e-02,
        -9.2385e-02,  3.7061e-02, -4.5968e-02, -2.2099e-02, -1.3297e-04,
        -1.9248e-03,  4.8111e-02, -7.9445e-02, -1.4652e-01,  3.3335e-03,
        -2.7113e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3870, -0.2621,  0.2729,  0.3588,  0.2816,  0.1408, -0.0573, -0.1402,
         0.0065,  0.1750, -0.0117,  0.0792,  0.0499,  0.1282, -0.0240,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1300, -0.0578,  0.6454, -0.1202, -0.4895, -0.1323, -0.2614,  0.0886,
        -0.1701,  0.0160, -0.0471, -0.0037,  0.0559, -0.1541,  0.0860,  0.3714,
         0.4306,  0.1558,  0.0056,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0143,  0.0397,  0.1384,  0.1784, -0.2397, -0.1676,  0.0474,  0.1110,
         0.0852,  0.0435, -0.0123,  0.0211,  0.0366, -0.0074, -0.0812, -0.0891,
         0.0488, -0.0892,  0.0061, -0.0284, -0.0185,  0.0072,  0.0293,  0.0055,
        -0.0051, -0.0006,  0.0375,  0.0236, -0.1823,  0.0655,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.2108, -0.5274,  0.0190,  0.1834, -0.0106, -0.1764, -0.0054, -0.1101,
         0.0212, -0.3254, -0.1453, -0.0502,  0.1732,  0.0444,  0.0969, -0.2049,
        -0.0988,  0.0043,  0.1629,  0.1255,  0.0328,  0.0370,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0861,  0.3647, -0.0056,  0.0522,  0.0487, -0.0679, -0.1194, -0.0895,
        -0.0018, -0.1735,  0.1626, -0.0586, -0.0305,  0.0525,  0.0402, -0.1298,
         0.0418,  0.0302, -0.0600, -0.1091,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1364, -1.3800,  0.6419, -0.4295,  0.1749, -0.1488, -0.3001, -0.0558,
        -0.1134,  0.0323,  0.0761, -0.0092, -0.0371,  0.2861, -0.0614,  0.0271,
         0.0171, -0.0937, -0.0922, -0.1183,  0.0284,  0.2268, -0.0355, -0.3662,
        -1.6838,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3664,  0.0593, -0.1049,  0.1434, -0.0785,  0.2127,  0.1280, -0.1101,
         0.2092, -0.0504, -0.0703,  0.0932,  0.0208,  0.0573,  0.0040, -0.1003,
         0.1329, -0.0638,  0.0358, -0.1578, -0.0713, -0.0589, -0.0787, -0.0040,
         0.0216, -0.0095,  0.2063,  0.1722,  0.2053,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1548, -0.2396,  0.1468,  0.2838,  0.2911,  0.1148,  0.2768,  0.1021,
         1.3796,  0.5994,  0.5973,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1054,  0.3446,  0.0501, -0.0129,  0.0182, -0.1806, -0.0385,  0.1080,
         0.0484,  0.0762,  0.1456,  0.0650, -0.0618,  0.1272,  0.3073,  0.0938,
         0.1469,  0.0523,  0.0240,  0.0271,  0.1410,  0.2406, -0.1229,  0.1465,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2238,  0.1241, -0.1724,  0.1494,  0.0875,  0.0307,  0.0659, -0.0238,
        -0.0136,  0.0328,  0.0065,  0.0832, -0.0018,  0.0698,  0.0119,  0.0224,
        -0.0186,  0.0102,  0.0327,  0.0353,  0.0382,  0.0009,  0.1165, -0.0169,
         0.0493,  0.0097, -0.0625,  0.0259, -0.0860,  0.2324,  0.0158,  0.1308,
         0.0637,  0.0086, -0.0580, -0.0051, -0.0986,  0.0076,  0.0694, -0.0206,
        -0.0158,  0.0908, -0.0064, -0.0372,  0.0682,  0.0140, -0.0269, -0.2233],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4118,  0.1748,  0.0642, -0.0145, -0.0905, -0.0426, -0.0284, -0.1574,
        -0.6099, -0.1723, -0.3367,  0.0677, -0.1931, -0.0594, -0.1929,  0.0703,
         0.2911,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1303, -0.7655, -0.2396,  0.3836, -0.1225, -0.1203,  0.1950,  0.0885,
         0.1104, -0.2825, -0.0516, -0.2338, -0.1239,  0.3790,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0055, -0.3535, -0.1572, -0.0293, -0.0380, -0.0098,  0.0004, -0.0105,
        -0.0379, -0.2154,  0.0326, -0.0850, -0.0731, -0.0152, -0.1043, -0.0314,
        -0.0710,  0.0620, -0.0593, -0.0047, -0.0046, -0.0587, -0.0283,  0.0031,
        -0.0040, -0.0764, -0.0035, -0.0776,  0.0571, -0.0152, -0.0208,  0.0340,
        -0.1979, -0.2492,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7761, -0.2728, -0.0956,  0.1825, -0.2466, -0.2115, -0.0959, -0.4615,
         0.0500,  0.0044, -0.0677,  0.0464, -0.0657, -0.0181,  0.1187,  0.4451,
         0.0435,  0.0022, -0.0408,  0.0752,  0.1125,  0.0750,  0.0235, -0.0599,
         0.0352,  0.0598,  0.0126, -0.0267, -0.0277,  0.1526, -0.0052,  0.1247,
         0.2245,  0.1434, -0.1142,  0.1273, -0.0635,  0.2655,  0.0854,  0.0585,
         0.1060,  0.1296,  0.0297,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4146, -0.0585,  0.0998,  0.0593,  0.0770, -0.0059, -0.0119, -0.0405,
         0.0433,  0.1346, -0.0193, -0.0220,  0.0320,  0.0054, -0.0359,  0.0398,
        -0.1333,  0.0420,  0.0012,  0.0267,  0.0200,  0.0378, -0.0450, -0.0326,
        -0.0629, -0.0658,  0.2327, -0.0251, -0.0478, -0.0636, -0.0011, -0.0192,
         0.0047,  0.1138, -0.0590,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.1597,  0.8559,  0.1729,  0.1957, -0.0185,  0.0224, -0.0285,  0.0588,
         0.1103,  0.2474, -0.1326, -0.2108, -0.0216,  0.0878,  0.3448,  0.1683,
         0.0475,  0.0272, -0.0315,  0.0211, -0.0231, -0.0808, -0.2167,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0771,  0.2368,  0.1777, -0.2079,  0.0768,  0.0392,  0.0381, -0.1387,
         0.0542, -0.0067,  0.0597,  0.0943, -0.0277, -0.0449,  0.0350, -0.1694,
        -0.0162,  0.0099, -0.1318, -0.0236, -0.0243, -0.0138,  0.1216, -0.0089,
         0.0296, -0.0529,  0.0196, -0.0447, -0.0067, -0.0116, -0.0189,  0.0091,
         0.0243, -0.0571,  0.0590, -0.1170,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1333e-01,  5.4889e-01,  1.2633e-01,  4.7041e-02, -5.1412e-02,
         2.4882e-02, -8.1041e-02, -4.9472e-02, -4.4678e-04, -1.0859e-01,
        -1.1282e-01,  6.6933e-02, -3.9796e-02,  1.9094e-01,  1.0676e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3940,  0.6318, -0.0270,  0.0455, -0.0325,  0.0954,  0.1893,  0.0199,
        -0.2158,  0.1874, -0.1174,  0.4839,  0.0922, -0.0786,  0.0353,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2953,  0.1869, -0.0107,  0.0247, -0.1137, -0.0269, -0.0769, -0.2010,
        -0.1192,  0.0272,  0.1251,  0.1145,  0.4856,  0.0468,  0.0225, -0.4036,
        -0.1878,  0.0532, -0.0006,  0.0818,  0.1114,  0.0223, -0.0664, -0.0147,
         0.0291,  0.1378, -0.0033,  0.0536,  0.1775, -0.0124, -0.0513, -0.0448,
        -0.1031,  0.0896, -0.0007,  0.2151, -0.2009,  0.2064,  0.0369,  0.0130,
        -0.0590,  0.0024,  0.0390,  0.0545,  0.1911,  0.0853,  0.0093, -0.1244,
         0.0282,  0.1230,  0.1692, -0.0314], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3216,  0.7050,  0.0249,  0.0048,  0.0410,  0.1069,  0.1531,  0.0440,
         0.1809, -0.0373,  0.0447, -0.1146, -0.0226, -0.1968,  0.0213,  0.0656,
         0.0908, -0.0199, -0.1622, -0.0332,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0731,  0.3145,  0.3455,  0.0090, -0.0190, -0.0411, -0.0757, -0.0363,
         0.1135,  0.0604, -0.0269, -0.0390, -0.0868,  0.0024,  0.0145,  0.0356,
        -0.0360, -0.0522, -0.0948, -0.0532,  0.2911, -0.1890,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7817,  0.4251,  0.3117,  0.4209,  0.0669, -0.2500, -0.2762, -0.1405,
         0.1836, -0.0297,  0.4819,  0.0880, -0.0361, -0.0575,  0.2321, -0.0282,
        -0.3835,  0.1201,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3287,  0.6028,  0.0129,  0.2438,  0.0650,  0.0932, -0.0894, -0.0843,
         0.3403,  0.1356, -0.0585,  0.2341,  0.0377,  0.0544,  0.0912,  0.0459,
         0.0764,  0.1700,  0.0103,  0.1477, -0.0102,  0.0393, -0.0146,  0.0774,
        -0.0606,  0.2393,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2083,  0.0614,  0.0729, -0.0606,  0.0684,  0.0693, -0.0004, -0.1099,
         0.1600,  0.0872, -0.1398,  0.0535,  0.0121,  0.1174,  0.1228,  0.0021,
         0.0419, -0.0517, -0.0018,  0.1501,  0.1841,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1312,  0.0189, -0.0131, -0.0088,  0.0559,  0.0383, -0.0081,  0.1742,
         0.1621,  0.0590, -0.0291, -0.0020, -0.0101,  0.0015,  0.0435, -0.0071,
         0.1305,  0.0029, -0.0089, -0.0480, -0.0127, -0.0371, -0.0201, -0.0488,
        -0.0165,  0.0386, -0.0011,  0.0043, -0.0779, -0.0053,  0.0754,  0.0307,
         0.0195,  0.0328,  0.0409,  0.0424,  0.0582,  0.1577,  0.0068, -0.0223,
        -0.0081,  0.0104,  0.0356, -0.0081, -0.0305, -0.1473, -0.4044,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3155,  0.6392, -0.1258,  0.0398, -0.0370, -0.1358,  0.0078, -0.4102,
        -0.1486, -0.1407,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.1543, -0.3594, -0.0151, -0.0980, -0.0732, -0.0638, -0.1565,  0.0066,
        -0.0446,  0.0310,  0.0240, -0.0354, -0.0032, -0.0267, -0.0358, -0.0338,
         0.0197,  0.0325,  0.0162, -0.0661,  0.0451,  0.0897, -0.0917, -0.1573,
        -0.0698, -0.0045, -0.2794, -0.3000,  0.0337, -0.0472,  0.0066,  0.0120,
         0.0321, -0.0593,  0.0220, -0.0645, -0.0104, -0.0346, -0.0111, -0.0453,
        -0.0412,  0.0510, -0.1126, -0.0819, -0.0287, -0.2208,  0.0729,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1805,  0.3170,  0.0873,  0.0836, -0.1623, -0.0310,  0.1010,  0.1222,
         0.0375,  0.1279,  0.0516, -0.0444, -0.0022,  0.1956, -0.1719, -0.0047,
         0.0343, -0.0289, -0.0874,  0.0125, -0.0762, -0.0188, -0.0485,  0.0291,
         0.0099,  0.0478,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0643,  0.1288,  0.0697, -0.0349, -0.0477, -0.0727, -0.0802,  0.0677,
         0.0418, -0.0270, -0.0818, -0.0149,  0.2144, -0.1195, -0.1120, -0.0595,
         0.0462,  0.1131,  0.0082,  0.0139, -0.0040,  0.1301, -0.0038, -0.0592,
         0.0914,  0.0016,  0.0443,  0.0245, -0.0165, -0.0014,  0.0484,  0.0896,
         0.0253,  0.0641,  0.0441, -0.0518,  0.0854,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1832,  0.4859, -0.2737,  0.1352,  0.3651,  0.2817, -0.0234,  0.1583,
        -0.0739,  0.3347,  0.0528, -0.1401,  0.0426, -0.3396,  0.2306,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3081, -0.0587,  0.1908,  0.0207, -0.0412, -0.2266, -0.1314, -0.1038,
        -0.4894, -0.2586, -0.2969, -0.5470, -0.1230, -0.3508, -0.2197,  0.3286,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.4648e-02,  9.5873e-01,  1.1128e-01,  8.0916e-02, -7.7471e-02,
        -3.4020e-01,  2.8098e-02, -2.4148e-01, -4.5479e-02,  4.3291e-02,
         1.4602e-01, -7.4045e-02, -4.5654e-02,  5.7565e-02, -1.4493e-02,
        -1.0893e-02,  5.3951e-03,  4.7846e-03, -1.2536e-02, -2.2189e-02,
        -4.1040e-02, -1.1984e-04,  8.4774e-02, -8.7241e-04,  3.6246e-02,
        -2.8363e-02,  7.1735e-02,  4.9576e-02, -8.9111e-02, -3.3893e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2522,  0.2825,  0.1585,  0.2086,  0.0305, -0.0014,  0.0276, -0.0131,
         0.0221, -0.0464,  0.0851, -0.1776, -0.1228, -0.1623, -0.0236, -0.1554,
         0.0605, -0.0581,  0.0715, -0.0251, -0.0201, -0.0183, -0.0134, -0.0252,
         0.0140,  0.0240, -0.0050,  0.0291,  0.0508,  0.0526, -0.0059, -0.0952,
        -0.1304, -0.0198,  0.0117,  0.0971,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8781e-01,  1.2130e+00,  1.3189e-01,  1.4416e-02, -3.1293e-02,
        -4.3831e-01,  4.9090e-02,  2.6177e-02,  1.4298e-02,  9.4809e-06,
         8.0007e-02,  2.5993e-02, -1.4046e-01,  8.6541e-03, -5.1204e-02,
        -4.1478e-03, -1.3402e-02, -2.5850e-02,  4.6837e-02, -3.0483e-02,
         1.2407e-01,  7.3712e-02,  6.7755e-02,  1.3263e-02,  6.0483e-02,
         2.6968e-02, -1.1647e-02, -1.8919e-02,  2.8018e-02,  1.8570e-02,
        -5.1427e-02,  1.6484e-02,  4.7692e-02, -3.1606e-01,  5.6091e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3591, -0.2739, -0.6627, -0.7879, -0.3215,  0.0018, -0.4745, -0.1634,
        -0.0471, -0.1912,  0.2051,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1800,  0.1305, -0.2433, -0.4606, -0.3599, -0.1659,  0.0410,  0.0754,
        -0.1034,  0.2705,  0.1253, -0.0448, -0.0084,  0.0026, -0.0394, -0.1318,
         0.0479, -0.2002,  0.1166,  0.0687, -0.0400,  0.0194,  0.1703, -0.1632,
        -0.3724,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0126,  0.0347,  0.0441,  0.0266, -0.0470, -0.0385,  0.0685,  0.0622,
        -0.0224, -0.1587, -0.0004,  0.0493, -0.0060, -0.0019,  0.0259,  0.0444,
        -0.0213, -0.0131,  0.0461, -0.2123,  0.1816,  0.0382,  0.0880, -0.0039,
         0.0901,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2845,  0.1932,  0.1032,  0.0359,  0.0120,  0.0092,  0.0198, -0.0221,
         0.0487,  0.0187,  0.0062, -0.0332, -0.0565, -0.0124,  0.0278,  0.0545,
         0.0483, -0.0040, -0.0302,  0.0118, -0.0353,  0.0048, -0.0345,  0.0027,
        -0.0052, -0.0086,  0.0231,  0.0215,  0.0238, -0.0637, -0.0153, -0.0272,
        -0.0518, -0.0079,  0.0123, -0.0303, -0.0119,  0.0035, -0.0262,  0.0060,
         0.0057,  0.0181, -0.0061,  0.0567, -0.0018,  0.0836,  0.0476, -0.0105,
        -0.0249, -0.0401, -0.0274, -0.0256,  0.0738, -0.0497], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.4264,  1.5337,  0.1042,  0.1597,  0.0567,  0.0620,  0.1149, -0.1074,
        -0.3338, -0.0485,  0.2641, -0.1238, -0.1568, -0.0937,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3987e-01,  6.9917e-01,  4.0492e-02, -3.7589e-02, -7.0140e-02,
        -2.3174e-03,  1.0401e-02,  2.1484e-01,  3.0618e-04,  4.6240e-02,
        -1.6741e-02, -1.0024e-01, -2.5537e-02, -6.7195e-02, -2.3017e-01,
        -4.7968e-02, -6.7639e-02,  5.8018e-03, -5.6515e-02,  8.3484e-02,
        -9.8891e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2154,  0.0743, -0.1904, -0.0536,  0.1032,  0.0703,  0.1165,  0.0811,
         0.1640,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0539,  1.1245,  0.1376, -0.1091,  0.1427,  0.0067,  0.0364, -0.1771,
        -0.0244, -0.0099,  0.0590, -0.0692, -0.1060, -0.1573, -0.0299, -0.0959,
        -0.0821,  0.0182,  0.1414,  0.1502, -0.0023, -0.1625,  0.0138,  0.0306,
        -0.0190, -0.0077, -0.2685,  0.5021,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0882,  0.9430,  0.2055,  0.1323, -0.2865,  0.0479, -0.0504, -0.1378,
        -0.1290, -0.1184,  0.2107,  0.0805,  0.0168, -0.1094, -0.1291, -0.0290,
        -0.0447,  0.1265, -0.0171, -0.0201,  0.2333, -0.2394,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0956, -0.3772, -0.2175, -0.0647, -0.2251, -0.0609,  0.0649,  0.0122,
         0.0573,  0.1260, -0.1094, -0.0570, -0.0820, -0.1015, -0.1251,  0.0198,
         0.0276,  0.0154, -0.0758,  0.1001,  0.2016,  0.0177,  0.0321,  0.0757,
         0.1046,  0.1440,  0.0260,  0.0934, -0.0282,  0.0314,  0.1484,  0.0065,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.3498e-02, -1.3850e+00, -7.2481e-02, -1.5822e-01,  3.0173e-03,
        -1.9226e-01, -9.3304e-03, -4.2671e-03,  1.5842e-01,  5.6703e-02,
         7.6546e-02, -3.1258e-02,  5.8148e-02,  1.2613e-01, -5.3171e-02,
        -1.0053e-01, -9.8371e-02, -2.0073e-02, -6.8329e-02, -6.4034e-02,
        -1.0521e-01,  6.0275e-03, -3.1977e-02,  1.2669e-01,  1.6902e-04,
         3.3706e-02,  1.0600e-01,  3.7642e-02, -4.2049e-02,  8.7261e-02,
         2.5468e-01,  1.5674e-02, -1.2399e-02,  5.3667e-02, -4.0253e-02,
         1.2342e-01,  1.0938e-01, -3.9811e-02,  1.9042e-02,  1.3923e-01,
         1.2088e-01,  3.2281e-02, -8.8115e-02, -1.1875e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.3809e-01,  1.2171e+00, -2.0572e-01, -4.4632e-03, -1.5378e-01,
        -3.7447e-01, -4.4709e-04,  1.2051e-01,  2.8836e-01, -1.1410e-01,
        -1.3233e-01, -2.8788e-02,  9.5792e-03, -1.6543e-01,  1.4200e-01,
        -3.8565e-01,  3.1285e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6325,  1.8688,  0.0664,  0.2109, -0.1327,  0.3624,  0.0613, -0.1037,
        -0.0201, -0.0083,  0.0413, -0.0464, -0.0376,  0.0406, -0.1193, -0.0417,
         0.1884, -0.1056, -0.1099, -0.0672,  0.0484,  0.1115, -0.0690,  0.1192,
        -0.1972,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4749, -0.8215, -0.4622,  0.1048, -0.0266, -0.0805,  0.1775,  0.0202,
         0.2472, -0.0222,  0.1028, -0.0966, -0.0655, -0.0354, -0.1450, -0.0321,
         0.3026,  0.0780,  0.0301, -0.0339,  0.5489,  0.1879,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0241,  0.5481, -0.6434,  0.0540,  0.0569, -0.2108, -0.1146, -0.1391,
        -0.0178,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4481,  0.8800,  0.2154, -0.0850,  0.0888,  0.0499,  0.2103, -0.0521,
        -0.1126, -0.0274, -0.0582,  0.0602, -0.1439,  0.0308, -0.0640, -0.1396,
        -0.0372, -0.0941, -0.0222, -0.0205, -0.0544, -0.0335, -0.0223,  0.0066,
        -0.0126,  0.2745, -0.0503, -0.0241, -0.2137, -0.1370,  0.0801, -0.2248,
         0.0379,  0.2276,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 5.0380e-01,  2.2664e+00,  1.3397e-01, -2.5795e-01,  6.9610e-02,
         9.1240e-02, -2.7247e-01, -6.9240e-02,  1.4547e-01,  1.6688e-01,
        -6.3983e-02, -5.8618e-02, -8.8086e-02, -2.8591e-01, -1.6458e-03,
         8.7912e-02,  2.6110e-01,  1.0874e-01, -3.5128e-02,  8.4637e-02,
         4.0500e-02, -1.5538e-01, -1.5589e-01, -4.4092e-02,  3.9250e-02,
         9.5932e-03, -7.0310e-02, -3.7205e-02, -4.0525e-02,  9.4197e-02,
        -3.2494e-02, -6.2703e-03,  7.2018e-02, -7.5146e-02, -1.7617e-02,
        -9.8909e-03, -5.1463e-02, -4.3005e-02,  6.3715e-02,  3.9516e-02,
        -6.3936e-02, -4.5328e-02, -2.1715e-01, -9.6418e-02,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7391, -3.8389, -1.1190,  0.6848,  0.3507,  0.1949,  0.6735, -0.0209,
        -0.3816,  0.3502, -0.3750,  0.1721, -0.4731, -0.0818, -0.2729,  0.1706,
         0.2473, -0.2047,  0.2880,  0.5314,  0.6432, -0.1860, -0.6868,  0.3610,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5764,  5.3375, -1.4547,  0.5080, -0.5563, -0.2959,  0.3145, -0.1157,
         0.6437,  0.1816,  0.6207, -0.1479,  0.3218,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3330,  1.1589,  0.0795, -0.1363,  0.0964,  0.0284, -0.1423,  0.0119,
        -0.0577, -0.0673, -0.2039,  0.0533, -0.0324, -0.0758,  0.0460, -0.0528,
         0.0145,  0.0378,  0.0229,  0.0566, -0.0301, -0.0033,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0022, -1.6672, -0.3414,  0.2637, -0.1078,  0.0984, -0.0240, -0.1485,
        -0.1091, -0.1710,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7572e-02, -3.2719e+00,  4.7864e-01,  2.6894e-01, -1.3716e-01,
        -7.6224e-02,  4.4847e-03, -1.0390e-01, -4.6352e-04, -3.0415e-01,
         1.9548e-01,  1.0701e-01, -1.9382e-01, -2.1638e-01,  1.6789e-01,
        -4.6405e-02, -3.0318e-01, -1.0656e-01, -5.7057e-02,  9.1164e-03,
        -1.6588e-01, -1.0847e+00, -1.1048e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2624,  1.6371, -0.3476, -0.4266,  0.0848,  0.0084, -0.1831, -0.1857,
        -0.0286, -0.0530, -0.1489, -0.0322, -0.0427, -0.0849,  0.2198,  0.0022,
         0.0420,  0.0048, -0.2688,  0.2060, -0.0883, -0.1145, -0.0689,  0.0126,
        -0.0289, -0.1247,  0.0072,  0.0548, -0.0815, -0.1064, -0.1362, -0.0060,
        -0.1609, -0.0309,  0.0947, -0.0142, -0.0338, -0.0088,  0.0820,  0.1309,
         0.0648,  0.0962,  0.1339, -0.2483,  0.2923], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1077e-01,  1.3495e+00,  6.8026e-02, -3.8696e-01, -5.8000e-02,
         4.4185e-02,  2.2953e-01, -6.4780e-02, -7.0813e-02,  6.8252e-02,
         2.9097e-01,  3.3309e-02, -7.1274e-02, -5.1795e-02, -2.7970e-02,
        -3.5307e-04, -3.2585e-01, -7.3794e-02, -8.0051e-02, -1.5473e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2474e-02,  1.2231e+00, -4.8628e-01, -1.2049e-01,  4.8003e-02,
        -8.5735e-02, -2.3920e-02,  2.3302e-02,  2.4134e-02,  1.3646e-04,
         5.3066e-02, -4.3540e-02,  1.2289e-01,  7.3169e-02,  1.2366e-01,
        -1.4138e-02, -7.6430e-02,  6.2778e-02,  5.1900e-02, -1.4209e-02,
         3.3234e-02, -1.1239e-02, -1.8640e-02,  3.8281e-02,  1.5900e-02,
        -1.0022e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2063e+00,  3.1921e+00,  1.7171e-01, -3.0724e-01,  4.7145e-03,
        -3.2534e-01,  9.0022e-02, -7.4075e-02, -1.7833e-01,  9.5025e-02,
         2.4309e-02, -1.0545e-01,  3.0921e-01, -4.4187e-01, -2.1611e-01,
         3.8438e-01,  1.5852e-01,  1.2527e-01,  3.2706e-02,  1.8699e-02,
        -1.1417e-04,  4.7023e-02,  2.3674e-01,  9.0867e-02,  2.1764e-02,
         7.3131e-02, -4.8436e-01, -1.5151e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2337, -0.2779,  0.1098, -0.2420, -0.0267, -0.1623, -0.2356, -0.1633,
        -0.1095,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9101, -2.1362,  0.3502,  0.4078, -0.0942,  0.1083,  0.0852,  0.1686,
         0.1707,  0.0618,  0.0059,  0.1385, -0.0135,  0.0242, -0.1918,  0.1537,
        -0.0507, -0.0415, -0.0449,  0.0122,  0.0632, -0.0937,  0.0381, -0.0198,
        -0.1354,  0.2714,  0.1852,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 2.1878e-01,  1.9708e+00,  7.1805e-02,  2.3773e-01, -3.5917e-02,
        -7.0630e-02,  7.1260e-02, -1.4921e-01, -1.8216e-02, -3.7336e-04,
        -5.8380e-02, -3.0110e-01, -6.8287e-02,  1.7148e-01,  1.2307e-01,
         2.7781e-02, -3.7770e-02,  2.9051e-02, -5.0775e-02,  1.8673e-02,
         7.8644e-02, -9.2320e-02,  1.5063e-01,  1.9821e-02, -9.3085e-02,
         1.3876e-01,  8.8160e-02,  3.1633e-02, -2.3590e-02, -6.0367e-02,
         7.6665e-02,  4.7565e-02,  2.6559e-03,  2.1448e-02,  5.7511e-03,
        -2.2855e-02, -1.0851e-01,  1.3447e-01,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8861e+00,  3.0251e+00,  1.5236e+00, -1.0391e-01,  7.7817e-02,
         1.3751e-01, -1.2094e-01,  3.4142e-02, -8.2894e-02, -7.1069e-03,
        -1.4085e-04, -3.0739e-01, -6.1972e-02, -1.2710e-01, -1.7339e-01,
         1.5396e-03, -2.8618e-02, -1.6909e-02,  2.3648e-02, -9.0227e-02,
        -4.1632e-02, -6.6053e-02, -1.3057e-02,  1.0581e-01, -4.2913e-03,
         3.5244e-02,  6.8494e-04, -6.7060e-03,  1.9518e-02,  5.0704e-03,
        -2.1494e-01,  2.6614e-02,  7.3315e-02, -1.3707e-01,  3.5837e-02,
         6.7769e-02,  2.1469e-01,  2.5666e-01,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0987, -3.3032,  0.4128,  0.4924,  0.1330, -0.2115, -0.1311,  0.3839,
        -0.0639,  0.3832,  0.1161, -0.0173, -0.0873,  0.0200,  0.1433, -0.0831,
         0.1700,  0.1765, -0.0429, -0.1510, -0.0061, -0.0074,  0.1212, -0.0752,
         0.1145, -0.0085, -0.0166,  0.0652,  0.0907,  0.1462,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3320e-01, -1.6615e+00, -2.0228e-01, -1.3995e-01, -6.8240e-02,
         3.5552e-02,  2.5763e-01, -2.6838e-03,  1.8704e-02,  1.7721e-01,
         2.6777e-02, -3.1383e-04, -3.4151e-02, -3.4958e-02, -2.3095e-02,
         1.0827e-02, -7.1338e-02, -9.9876e-03,  8.3146e-02, -2.8394e-01,
        -5.9665e-03, -1.0883e-02,  3.9648e-02,  1.1065e-01,  3.5228e-02,
         3.4991e-02,  1.4613e-01,  3.4284e-02, -1.7317e-02, -1.0290e-01,
        -3.4381e-01, -8.8465e-02,  9.2378e-03, -5.7613e-02,  1.5469e-02,
         2.0246e-02,  1.7421e-02,  1.1253e-02, -3.7496e-01, -9.1986e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0304,  1.1198, -0.4576, -0.2466,  0.1623,  0.1265,  0.0311,  0.0375,
        -0.0052,  0.0485, -0.0459, -0.0118,  0.0529, -0.0518, -0.0309, -0.0572,
        -0.0119,  0.0238,  0.0541, -0.0503, -0.1079,  0.0323, -0.0458, -0.0041,
        -0.0049,  0.0330,  0.0413,  0.0354,  0.0197, -0.0643,  0.0512,  0.0407,
        -0.0124,  0.0567, -0.0957,  0.0958, -0.0204,  0.3342,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2358, -1.4069, -1.2879, -0.0609, -0.2190,  0.0825,  0.2848, -0.0500,
         0.2856, -0.0962, -0.1316, -0.3655, -0.1704,  0.0067, -0.0074, -0.5036,
        -0.2064, -0.0648, -0.1750, -0.0423, -0.1449,  0.2193,  0.3699, -0.0131,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4467, -1.5383,  0.3483,  0.0790, -0.1327, -0.2284, -0.0364, -0.0560,
         0.0184, -0.0274, -0.0968, -0.0327, -0.0023, -0.1705,  0.0169, -0.0520,
         0.0104, -0.0200,  0.0206,  0.0110,  0.0299, -0.0817,  0.0365, -0.0629,
        -0.0272,  0.0227,  0.0513, -0.0832, -0.0271, -0.0401, -0.1294, -0.1876,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2536,  1.1820, -0.0013, -0.1421,  0.0922, -0.1906,  0.1147, -0.3388,
         0.2321,  0.0551,  0.0822,  0.1358,  0.0188,  0.0386,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0788e-01, -4.3416e+00, -8.9413e-01,  5.7150e-01,  2.6862e-01,
         3.5939e-03,  1.0578e-01,  6.0112e-01,  3.1062e-02,  3.0080e-01,
         2.3526e-01, -9.1731e-02,  2.0933e-01,  4.5782e-01,  1.8545e-01,
         1.2614e-01, -2.0172e-01,  2.9026e-02,  2.5380e-01,  3.8491e-02,
        -3.4797e-02, -2.5704e-01, -3.9351e-02, -5.4932e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1424, -2.7299,  0.1214,  0.0502,  0.2291,  0.7283, -0.0426,  0.0060,
         0.0158,  0.0371, -0.1240, -0.1801,  0.0624,  0.0297,  0.0530, -0.1756,
         0.1231,  0.0053,  0.1699,  0.7190, -0.6332,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1452,  1.5494,  0.2555, -0.1843,  0.2948, -0.0941,  0.1149, -0.0024,
         0.0563,  0.0266, -0.0261,  0.0492,  0.1158,  0.1373, -0.0311,  0.3533,
        -0.0185,  0.1066,  0.0243,  0.0809, -0.0032,  0.0906,  0.0121,  0.0052,
        -0.0137,  0.1241, -0.0413,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1197,  0.9743,  0.1285,  0.1504,  0.1027, -0.1649,  0.0218,  0.0226,
        -0.0077,  0.1159,  0.1130,  0.0734, -0.0169, -0.1141, -0.0117,  0.1551,
        -0.1275,  0.0182,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-1.4198e-01,  3.6813e+00,  1.4998e-02,  6.2905e-01,  3.8685e-02,
        -2.9126e-01,  4.7135e-02, -3.8555e-03, -6.7249e-02, -3.7409e-02,
        -5.2031e-02,  1.5557e-01,  1.5271e-01, -5.5697e-02, -9.8875e-02,
        -7.3339e-01, -2.7966e-01, -3.4580e-01,  3.5547e-02, -8.7247e-02,
        -2.4079e-02, -4.6042e-02, -5.3939e-02,  8.5563e-03, -3.6655e-02,
        -8.9423e-02,  1.0025e-01, -8.0567e-02,  5.1589e-02,  5.1897e-02,
         3.4083e-02, -6.5129e-01, -9.2770e-02,  6.2821e-02,  2.0426e-03,
        -1.7339e-01, -3.5294e-02,  8.6584e-02,  1.9078e-01,  6.2386e-01,
        -3.4961e-02, -1.9770e-01, -4.0716e-02, -1.8478e-01, -7.2326e-01,
         5.5703e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9768,  1.2113,  0.4801, -0.1660,  0.0999,  0.0859,  0.0609,  0.0704,
        -0.0037,  0.0349,  0.1075,  0.0686, -0.0324, -0.0167, -0.0184,  0.0923,
         0.0236, -0.0519,  0.1719,  0.2446,  0.1198, -0.1034,  0.1654, -0.0862,
        -0.0206,  0.0563,  0.1628,  0.0035, -0.1195, -0.0360,  0.0028, -0.1504,
         0.0347,  0.0094,  0.0477, -0.0634,  0.0034, -0.0770,  0.0246, -0.0305,
        -0.1351,  0.0820, -0.1152, -0.0073,  0.0353, -0.1182,  0.2739,  0.5718],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8658, -1.1063,  0.6014,  0.0291,  0.4460, -0.3371,  0.1385,  0.0689,
        -0.1043,  0.0368, -0.0996, -0.2159, -0.0876,  0.0147,  0.0021, -0.1570,
        -0.1574,  0.0264, -0.0179,  0.0739, -0.0763,  0.0597, -0.0911, -0.0686,
         0.0836,  0.0537,  0.0119, -0.0166,  0.2575, -0.0094,  0.0656, -0.0073,
        -0.1829, -0.5727,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0381,  1.9593, -0.4262,  0.2764, -0.0396, -0.1350,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2526e-01, -2.1284e+00,  9.6632e-01, -4.3670e-02,  1.1002e-01,
        -1.5130e-01, -1.7890e-01, -5.7429e-02, -1.6743e-01, -1.4095e-01,
         3.1155e-02,  3.3878e-02,  1.1028e-02,  2.2519e-01, -4.5322e-02,
        -1.4159e-03,  1.0437e-01,  1.0239e-01, -6.2299e-02,  4.5805e-02,
         3.2312e-01, -4.1528e-02,  1.9939e-02, -6.0254e-02, -2.1559e-01,
        -8.1039e-02,  1.3482e-01, -9.8736e-02, -3.4999e-02, -5.4789e-02,
        -3.4781e-02,  1.9322e-02, -9.2830e-02,  5.1998e-02,  6.9015e-02,
        -6.4694e-02, -9.5448e-02, -3.9612e-02,  1.2878e-03, -1.9922e-02,
         8.6994e-02,  1.9177e-02, -3.7291e-02,  1.4117e-01,  8.5263e-02,
        -7.4086e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1319, -1.0663, -0.4046,  0.0829, -0.0487, -0.1249, -0.0346,  0.0827,
        -0.0491,  0.1680,  0.0461,  0.0214,  0.0099,  0.1139, -0.1045,  0.1103,
         0.2429,  0.0182, -0.0067, -0.0221,  0.0424, -0.1858,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1155,  1.2061,  0.3711, -0.5534,  0.2287, -0.0733, -0.0894,  0.0938,
        -0.0160, -0.2564,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0077,  6.3270,  0.2325,  0.0277, -1.0887,  0.2841, -0.6710,  0.2563,
         0.7272, -0.0921, -0.3162, -0.0521, -0.7964,  0.3626, -0.3782, -0.4092,
        -1.5936,  0.2837,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6166,  1.2289,  0.0455, -0.2633,  0.4466, -0.0088,  0.1772,  0.0703,
         0.0798, -0.1542, -0.0884, -0.0216, -0.0210,  0.1291,  0.0821,  0.2904,
        -0.1363,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9473,  1.0853,  0.1534,  0.0085,  0.3969, -0.1804, -0.1427,  0.0340,
         0.0273, -0.3044, -0.2136, -0.3234, -0.3786, -0.0301,  0.0936,  0.0991,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2546, -2.4644,  0.0551, -0.2053, -0.0784, -0.1475, -0.0035,  0.2336,
        -0.2433, -0.1809, -0.1590,  0.0625, -0.0418,  0.0294,  0.1909, -0.2894,
        -0.0880,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2851, -1.9494, -0.3820,  0.1097,  0.2607, -0.1235,  0.2227, -0.1915,
         0.0430,  0.0635, -0.0032, -0.0269,  0.2356,  0.3889,  0.1606,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-3.7014e-01, -2.3527e+00,  9.5698e-02, -4.6380e-03,  1.9917e-01,
         7.6729e-02, -4.0699e-02,  5.8386e-01,  1.2804e-03,  4.9880e-02,
         1.3869e-01,  2.0144e-02,  1.7085e-01,  8.2680e-02,  3.0811e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0213, -1.1923,  0.1697,  0.0496, -0.2547,  0.0976,  0.1986,  0.1114,
         0.0143, -0.1612,  0.1328,  0.1720,  0.0341, -0.2186, -0.0673, -0.0132,
         0.0188,  0.1849, -0.0060,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7571e-01, -5.3027e+00, -6.4582e-01,  1.6884e-01,  1.2995e-01,
         2.6105e-01,  8.9871e-02, -3.9208e-01,  3.8499e-01, -7.9153e-01,
         3.1954e-01,  8.8953e-01,  5.2165e-01,  1.2050e-01, -3.3477e-01,
         4.0205e-02, -5.2541e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6119e-02, -1.7753e+00,  1.2028e-02, -3.1672e-01, -5.6890e-02,
        -1.1335e-01,  1.1194e-01, -4.2745e-03,  2.2384e-02, -1.3823e-02,
         1.5466e-01,  8.7069e-02,  8.9465e-02,  1.7185e-02,  7.6628e-02,
        -1.7531e-01, -1.7277e-02,  8.1261e-02, -1.4438e-01,  5.3260e-02,
         2.8539e-02, -1.2210e-01,  3.3964e-02,  7.1271e-03,  1.9435e-01,
        -1.0907e-02,  3.6820e-02,  3.3904e-02, -7.8141e-02,  1.0871e-02,
        -4.0582e-02,  2.0431e-02, -2.3340e-02, -2.6488e-02, -4.9865e-02,
         3.3893e-02, -8.0153e-03,  3.3329e-02,  1.0950e-02,  3.3108e-02,
         8.2458e-03,  1.6408e-03,  3.5844e-02, -3.5236e-02, -5.8672e-02,
        -6.1573e-02,  9.1828e-02,  9.3807e-02,  5.1648e-02, -9.8232e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4477,  2.7865,  0.5440,  0.4998, -0.0941, -0.0276, -0.4621, -0.2816,
        -0.0656, -0.1892, -0.1514,  0.0410, -0.0851, -0.3178, -0.4068,  0.0572,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0993, -5.0535, -0.2092, -0.0590, -0.1929, -0.1732, -0.0837, -0.0253,
        -0.1229, -0.2127,  0.2290,  0.0677, -0.0737, -0.0916, -0.2139,  0.3354,
        -0.6097, -0.1151,  0.2008, -0.1773,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0979e-01, -1.3344e+00,  3.7855e-01,  4.5256e-01, -4.2001e-01,
         3.5785e-01,  3.2720e-01,  1.0346e-01, -5.2444e-02, -7.0815e-02,
        -1.7195e-01,  3.0686e-01, -7.6810e-03, -4.0342e-04,  3.9996e-02,
        -9.5326e-02, -1.8748e-01,  5.2475e-03, -2.9803e-02,  1.3172e-01,
         4.4779e-01,  2.5656e-02, -1.0248e-01, -6.9536e-02, -1.6957e-01,
        -4.6773e-01,  1.6478e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3018, -0.5374, -0.2491, -0.1253, -0.0945, -0.1287,  0.0451, -0.0126,
         0.0785, -0.0410,  0.0728,  0.4042, -0.0319, -0.0260,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0270e-01, -2.0111e+00,  7.2303e-02,  1.8677e-02, -4.3885e-02,
        -4.4692e-02,  1.2337e-03, -1.1782e-03,  1.3625e-01,  7.4096e-02,
         1.0801e-01, -8.0597e-02,  9.1864e-01, -1.0681e-01,  3.7679e-01,
        -9.2362e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.9476e-02,  3.1988e+00, -2.3747e-01,  2.6648e-01,  8.7358e-02,
         1.2720e-01,  1.0858e-01,  1.1065e-01, -3.1066e-02,  5.0040e-02,
         1.3112e-02,  8.2025e-02, -8.2114e-02,  1.6401e-02,  7.3327e-02,
         1.2557e-01,  5.1318e-02, -6.7886e-02, -9.5406e-02, -1.5141e-02,
         1.6038e-03, -7.5009e-02, -3.6223e-02, -1.0884e-01, -1.0490e-01,
        -8.1375e-02, -2.7832e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0018e-01,  3.8044e+00,  1.6007e-01, -1.9037e-01,  3.3130e-01,
         2.2197e-02, -6.4731e-02,  1.2835e-01,  1.4280e-01,  1.6130e-01,
         3.0526e-03, -2.0828e-01, -1.5263e-01,  9.1281e-02, -2.1792e-01,
        -2.9035e-01, -2.4367e-01,  4.7350e-03, -2.1621e-01,  3.1433e-01,
        -1.8956e-01,  2.4241e-03, -5.0763e-02, -2.2741e-02, -8.0064e-01,
         5.4829e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0203e-01, -1.6998e+00,  2.4966e-02, -1.7294e-02, -1.7013e-01,
         3.9068e-02, -3.9502e-02, -3.4593e-02,  1.0783e-01, -3.8755e-02,
         1.3660e-03,  5.7144e-03, -7.0236e-02,  2.3217e-02, -7.4242e-02,
        -1.1902e-01,  1.6739e-02, -9.8471e-03, -2.3826e-02, -1.3102e-01,
         1.1358e-01,  6.7006e-02,  8.8205e-02, -4.2146e-02, -1.7429e-02,
        -1.3575e-02, -2.0022e-02, -3.9999e-02,  5.7139e-02, -2.6567e-01,
        -1.6813e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.3556,  1.6481, -0.0098, -0.5369, -0.0544,  0.1607,  0.0081, -0.0326,
        -0.1242,  0.0178,  0.0874,  0.0509,  0.0207, -0.0283,  0.0257, -0.0226,
         0.0153, -0.1378, -0.0513,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8479, -1.3508, -0.1230, -0.1980,  0.1797, -0.2169,  0.3310, -0.1331,
        -0.1928,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0542e+00,  1.9447e+00,  6.7008e-02, -1.5004e-01, -1.3939e-01,
         5.5728e-03, -1.2142e-01, -2.8535e-01, -1.1689e-01, -5.4554e-02,
        -8.8568e-02,  1.7613e-01,  9.2391e-04,  2.1726e-02,  4.6849e-02,
         1.3272e-02,  2.0507e-02,  4.9538e-02, -1.8501e-02,  1.4836e-01,
         3.5110e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3477,  1.8136, -0.0191,  0.0514, -0.0412,  0.1126, -0.0051,  0.0622,
        -0.1259, -0.0384, -0.0461, -0.0832, -0.0152, -0.0091,  0.0206, -0.1408,
        -0.0160,  0.0152, -0.0056,  0.0166,  0.0542, -0.1165, -0.0186, -0.0429,
        -0.1136, -0.0608,  0.0506,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0077,  0.9909,  0.1696,  0.0326, -0.0743, -0.1730,  0.0763, -0.1681,
        -0.0277,  0.0390,  0.1258,  0.0812,  0.0974, -0.0356, -0.0221,  0.0854,
         0.0226,  0.0839,  0.0825, -0.0044, -0.1072,  0.0380,  0.0596,  0.0413,
         0.0107,  0.0490, -0.0515,  0.0663, -0.0192,  0.0737, -0.0651, -0.0375,
         0.0602, -0.0310, -0.1064, -0.0138,  0.0237, -0.1435,  0.1936,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1576,  3.2188,  0.3589, -0.0765, -0.3320, -0.1580, -0.1229,  0.1299,
         0.7713,  0.8624,  0.0310,  0.9552, -0.4254,  0.1143,  0.0341,  0.1429,
        -0.0419,  0.1082,  0.3862,  0.3761,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9274,  2.5244,  0.2168,  0.0169,  0.0616,  0.3221,  0.1347, -0.2668,
        -0.1304, -0.1678, -0.0038, -0.2324, -0.1762, -0.1256, -0.2673, -0.1826,
        -0.0834, -0.0889, -0.0659, -0.0813,  0.2393, -0.7411,  0.6910,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3462e-01, -2.6128e+00, -2.0536e-02, -9.7649e-04, -9.4138e-02,
        -3.1364e-01, -6.2505e-02,  1.1987e-01, -2.2111e-03, -5.4034e-02,
         7.0869e-04,  3.9787e-02,  8.7852e-02, -8.2928e-02, -2.2570e-02,
        -7.5071e-02, -1.2332e-01,  2.7089e-02,  1.0279e-03,  1.3052e-01,
         1.2175e-01, -9.6180e-02, -1.9261e-02, -6.3571e-02, -2.2071e-01,
        -4.0137e-02, -5.8554e-02, -9.6599e-02, -1.3643e-01,  2.3743e-01,
        -3.3651e-02,  2.7864e-04, -1.5895e-01, -1.9393e-01,  1.6248e-01,
         1.2077e-01,  1.6054e-02, -2.3417e-03, -4.8765e-02,  1.0351e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7992e-01, -3.4241e+00, -4.2354e-01, -1.9034e-01, -3.7331e-02,
         2.4533e-01,  7.6673e-02, -1.5831e-01,  8.3335e-02,  6.4031e-02,
         9.0918e-03,  5.0323e-02, -1.9968e-02,  8.3578e-02,  2.0982e-02,
         4.9530e-02,  6.2861e-02,  4.4937e-02, -6.3877e-02, -8.8538e-02,
         1.6784e-01,  1.2041e-02,  1.1779e-01,  6.1833e-03, -3.8023e-02,
        -2.4881e-02, -7.8217e-02,  1.4910e-02, -8.1920e-04, -8.1910e-02,
         3.3746e-02, -1.1142e-01, -5.1186e-02,  1.2780e-01, -3.2804e-02,
        -3.9389e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4457,  1.4911,  0.1021, -0.0576, -0.0299,  0.0889,  0.0611,  0.1105,
         0.0722,  0.0848,  0.0173,  0.0190,  0.0102,  0.0218, -0.1931, -0.0345,
         0.0217, -0.0244,  0.0966,  0.1050, -0.0885,  0.0554, -0.0564, -0.0742,
        -0.0430, -0.2156, -0.2963,  0.0809,  0.0973, -0.0160,  0.2290,  0.5394,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3550, -0.7789,  0.0291, -0.0798,  0.2676,  0.0300,  0.0015, -0.0216,
        -0.3788,  0.7332,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9636,  2.2389, -0.0345,  0.2019,  0.1954,  0.2781,  0.0301, -0.0390,
         0.4021, -0.4187, -0.1680,  0.8253, -0.0854,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.5256,  1.5959, -0.1410, -0.0400,  0.1784, -0.1227,  0.1024, -0.1586,
        -0.4113, -0.1200,  0.0976, -0.0395, -0.4574, -0.3543, -0.1586,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9933,  7.1375,  1.4321,  0.5977,  0.4296,  0.6186, -0.0437,  0.3342,
         0.1650, -0.3671, -0.4137, -0.2298,  0.0216, -0.1452, -0.2964,  0.0116,
        -0.1338, -0.2861,  0.3573, -0.2649,  0.4585, -0.3063, -0.2359, -0.1381,
        -0.3515, -0.2351, -0.0985, -0.4809, -0.0793, -0.2002, -0.3768, -0.5916,
         1.6269,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6171, -7.7149, -0.7292,  0.2696,  0.5612, -0.1782, -0.1947,  0.1315,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0119e-01, -4.3606e+00,  8.6396e-01,  2.9389e-01,  1.5718e-01,
         1.5440e-01,  9.0314e-02,  4.8554e-02, -1.8902e-01, -9.4330e-02,
        -3.2883e-02,  6.2131e-02,  6.8807e-02, -3.5873e-02, -1.9694e-02,
         4.3254e-01, -5.4016e-02, -6.7447e-02,  6.3760e-02, -8.2702e-02,
        -1.8274e-02, -3.7148e-02,  3.9123e-02, -1.6049e-02,  7.8819e-03,
         7.0254e-02, -5.7902e-02,  8.0518e-02,  2.1994e-01,  4.2064e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8199, -4.9570,  1.8707, -1.0170, -0.9865,  0.1408, -0.0919,  0.2353,
         0.3308,  0.4425, -0.2896, -0.1338, -0.0101, -0.0761, -0.1904,  0.0145,
        -1.4908,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0521, -2.6389, -0.5694,  0.3280, -0.1913, -0.0103, -0.2091,  0.1728,
         0.0999, -0.0243, -0.0701, -0.0611,  0.1186,  0.0038,  0.1261,  0.0727,
         0.0340,  0.0302,  0.0336,  0.0661,  0.0740, -0.0336, -0.1953,  0.3899,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4256e-01,  3.9760e+00,  3.7428e-01, -2.3017e-01,  9.1213e-01,
        -6.0861e-02,  9.6023e-01, -5.4946e-01, -6.5864e-01, -3.8886e-01,
        -8.8158e-03, -8.6777e-03, -5.9656e-02, -1.6969e-01,  8.3653e-03,
        -9.0411e-02, -4.2828e-01, -9.9924e-02,  1.8025e-02,  8.4264e-02,
         1.3987e-02, -2.3958e-02, -1.1425e-02,  1.3582e-01,  8.7529e-02,
         1.1074e-02, -5.6187e-02,  1.4253e-01,  1.0110e-01,  6.0043e-02,
        -1.0798e-01,  5.3309e-02, -5.5502e-02, -7.6408e-02,  2.0249e-03,
         1.6096e-01, -1.8131e-02,  8.8547e-01, -2.5657e-02, -2.9716e-01,
         2.7447e-02, -1.3884e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0773e-01,  1.9336e+00,  3.4462e-02, -2.2208e-02,  1.2326e-02,
        -2.2042e-01,  7.0931e-02, -8.6952e-02, -1.5365e-01,  1.3921e-01,
         1.0050e-01,  1.1595e-01, -3.4634e-02, -3.0103e-02, -3.4223e-02,
         1.7634e-03, -6.1732e-03, -8.6078e-03, -7.0148e-03,  6.9462e-02,
        -3.0077e-03, -6.3346e-02,  3.9834e-02,  1.2534e-01, -3.0084e-02,
         5.0949e-02,  4.7686e-02,  6.4732e-02,  6.8902e-02, -1.6253e-02,
         9.4341e-02, -1.4270e-01, -1.4878e-01, -2.4298e-02,  1.8401e-01,
        -1.7356e-01,  1.7052e-01, -2.7374e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3714,  5.6504,  0.2639,  0.9483, -0.3297,  0.1073, -0.2094, -0.3449,
        -0.8785, -0.8805, -0.0522,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8952e-01,  3.5654e+00, -3.1649e-01,  6.7593e-02, -2.7445e-01,
        -2.8646e-01,  1.7220e-02, -3.7226e-02, -4.7898e-01, -6.2538e-02,
        -1.1540e-01, -2.6196e-02, -1.2012e-01, -1.2525e-01, -5.1945e-02,
         1.0570e-02, -1.6688e-03,  9.4280e-02,  1.4619e-02, -6.8777e-02,
         1.5704e-01,  1.4562e-01, -7.3118e-02,  4.6915e-02, -3.0377e-01,
        -2.0524e-02, -1.5375e-01, -1.2340e-01, -9.0981e-02, -7.2293e-02,
        -3.4873e-01,  3.2511e-02,  4.2296e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6131e-01, -1.6629e+00, -1.9568e-02, -1.1105e-01, -1.8341e-01,
         9.1196e-02, -2.8000e-02, -5.0860e-02, -1.5479e-01,  1.3912e-01,
         1.1846e-01,  5.6738e-02,  4.3216e-02,  5.7779e-02,  1.4726e-01,
        -2.4160e-02,  2.5677e-02,  5.1517e-02, -9.2245e-02,  3.2754e-02,
        -2.8666e-02, -5.7568e-02,  2.1418e-02,  3.6659e-02, -3.4907e-02,
        -1.9046e-02, -3.8600e-03,  1.9338e-02,  2.7006e-02, -4.3993e-02,
        -1.5041e-02, -8.8906e-02, -1.3428e-01, -1.6570e-01, -9.5305e-02,
        -3.0719e-02, -2.5012e-02, -1.6784e-02,  5.8044e-03, -1.1404e-01,
        -8.6216e-02, -6.7033e-03,  1.2030e-02,  7.7545e-04,  8.0390e-02,
        -5.8334e-03,  5.4616e-02, -4.4338e-02,  3.1387e-02,  1.4107e-01,
         5.4488e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2705e-02, -2.2914e+00,  5.5961e-01, -1.2824e-02, -6.0827e-02,
         2.2435e-01,  1.9333e-02,  1.0350e-01, -8.1361e-02,  2.9303e-02,
        -1.0470e-01,  2.7191e-02, -5.5135e-02, -9.9694e-03,  2.9358e-02,
         8.3208e-02,  4.9943e-02,  1.5795e-03,  1.6526e-02, -3.0234e-02,
         9.4213e-02, -1.2671e-02, -1.7678e-01, -4.0478e-02, -1.2201e-01,
         7.8356e-03,  3.8145e-05, -7.8825e-02,  6.9754e-02, -1.2081e-02,
         1.4211e-01, -2.1074e-02, -5.3105e-04,  2.3120e-01,  7.5440e-02,
        -2.9389e-02, -4.0472e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.9555,  2.0371,  0.1933, -0.2444, -0.2276, -0.0722, -0.0668, -0.0798,
         0.0103, -0.0448, -0.1708, -0.0544, -0.0797,  0.0508,  0.0511, -0.0064,
        -0.0565, -0.0778, -0.1588, -0.0897,  0.0412, -0.1262, -0.0587, -0.0898,
         0.0128,  0.1139,  0.1032, -0.0183, -0.0410,  0.0365,  0.0722,  0.0061,
        -0.0651, -0.0159,  0.1480,  0.1090,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.5845e-01, -2.9109e+00, -2.9861e+00, -6.9609e-01,  5.5740e-01,
         5.3316e-02,  5.6617e-02, -1.2979e-01, -1.1866e-01,  7.4964e-02,
        -5.5590e-02,  1.7132e-01, -1.2834e-03,  5.7229e-02, -2.0808e-01,
         6.0565e-02, -4.1116e-02,  3.8639e-02, -3.7749e-02,  7.3983e-02,
        -1.1153e-01,  7.5283e-01,  3.2213e-03, -1.8357e-02,  4.4569e-02,
        -5.9913e-02,  1.6255e-02, -1.4549e-02, -3.1587e-01,  9.9458e-03,
        -1.5230e-01, -5.5529e-02, -1.3439e-02, -7.3826e-02, -8.4086e-02,
        -3.0549e-03, -5.6566e-02, -7.5502e-02, -6.0636e-02,  6.2098e-02,
        -6.9485e-02, -4.3643e-02,  1.6887e-03,  1.9318e-02, -5.2043e-02,
        -5.9098e-02, -2.6312e-02,  1.0947e-01,  6.6614e-02,  4.5145e-02,
        -2.6159e-02, -4.0214e-02, -3.1426e-01,  6.4411e-01, -2.5745e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.6575e-01, -1.5569e+00,  1.1773e-02, -7.3792e-02, -7.2578e-05,
        -1.3310e-02, -4.9474e-02, -6.4403e-02,  3.9517e-02, -6.6252e-02,
        -1.3308e-01, -1.0361e-01,  1.1157e-01,  2.7395e-02, -3.1946e-02,
         1.8499e-01, -5.5535e-02, -6.3278e-03, -6.3973e-02,  7.1192e-02,
         5.3881e-02,  8.2842e-02, -2.6930e-02,  1.7799e-02, -2.0522e-01,
        -1.2267e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9540e-01, -3.0348e+00,  3.9820e-02,  1.6686e-01,  3.4058e-01,
         3.2182e-01,  1.2283e-01,  7.5179e-04, -1.9257e-02,  9.5866e-02,
         4.8025e-02,  1.2987e-01,  2.3188e-01, -1.0434e-01,  7.7179e-01,
         8.3781e-02, -1.3669e-02,  3.6195e-01,  3.4474e-01,  3.1821e-01,
         2.4313e-01,  6.1610e-03,  2.0305e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0599e+00,  6.9919e+00,  7.7586e-01, -2.7966e-01,  1.6308e-03,
        -2.6499e-01,  4.3394e-01, -3.1324e-01,  2.3626e-01,  2.3555e-01,
        -8.1695e-02,  4.3741e-01,  2.9034e-01,  1.3820e-01,  1.1421e-01,
        -3.5935e-02, -2.6823e-01,  1.6346e-01,  1.3299e-01,  2.2288e-02,
        -2.3253e-01, -4.0987e-02,  2.4402e-03, -2.2374e-01, -1.0042e-01,
         2.9400e-01,  6.6177e-02,  3.1031e-01, -6.8114e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7432,  4.3605,  0.8378,  0.3960,  1.3624,  0.4711, -0.1614,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1239, -1.1490,  0.2955, -0.1059,  0.2390, -0.1195,  0.1443, -0.2849,
        -0.0736, -0.0637, -0.2093, -0.3355, -0.1063, -0.0110, -0.1358,  0.0861,
         0.0945,  0.2981, -0.4402,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5850e-01,  2.9393e+00,  9.8746e-02, -1.1882e-02, -2.7458e-01,
        -9.2232e-02,  2.1718e-02,  2.4994e-02,  7.5523e-02,  7.2867e-02,
         2.2618e-02, -4.6766e-02,  2.4561e-03,  2.9233e-01,  1.9324e-01,
         7.9556e-02,  6.8373e-02,  5.2888e-05,  2.3850e-01,  1.6085e-02,
         9.8822e-02, -4.9682e-02, -1.2339e-02, -5.3585e-02, -8.7997e-02,
        -3.9422e-02, -1.7611e-02,  4.7172e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3300,  2.0885, -0.0599,  0.2898,  0.3486, -0.0756,  0.1768,  0.0082,
         0.0860,  0.0925, -0.0252,  0.0312, -0.0482,  0.0408, -0.2732,  0.0283,
        -0.0740, -0.0635,  0.0439,  0.0778, -0.0963,  0.0186,  0.0999, -0.0460,
         0.0314,  0.0514, -0.1056, -0.0884, -0.0067, -0.1161,  0.1942,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1375,  5.8040,  0.2538, -0.4339,  0.2504, -0.2143, -0.7180, -0.4793,
        -0.0312, -0.4821, -0.1996, -0.0340,  0.0780,  0.2241,  0.1910,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8157,  4.2585,  0.9998,  0.4919, -0.0388,  0.0635,  0.3018, -0.5241,
         0.0729, -0.2782,  0.1162, -0.0337,  0.0255,  0.1766, -0.0496,  0.3118,
         0.3167,  0.2255, -0.3688,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2489, -4.9389, -0.1177,  0.0614, -0.4923, -0.1906,  0.0334,  0.0198,
         0.2460, -0.0229, -0.1157, -0.0745, -0.1456, -0.0690,  0.1135,  0.0935,
        -0.0549, -0.0397,  0.0496, -0.0148,  0.0638, -0.0925,  0.0143,  0.2028,
         0.0642,  0.0341, -0.1124, -0.0500,  0.1560, -0.1960,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.0913e+00, -1.7404e+00,  3.8732e-01, -3.4380e-01, -1.8785e-01,
        -3.5420e-02,  3.0317e-01,  1.5041e-01,  1.0438e-01,  1.5991e-02,
         1.6707e-01,  1.8042e-01, -2.1731e-02, -3.6048e-02, -9.8274e-02,
         4.2952e-03,  6.2925e-02,  9.0485e-02, -1.0858e-02,  1.2491e-03,
         2.1561e-01, -1.1393e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8616e-01,  8.7113e+00,  4.4890e-01, -1.2655e-01,  4.8938e-01,
        -4.0765e-02, -5.0607e-01,  1.1326e-01, -2.6134e-01, -3.3956e-01,
        -1.2224e+00,  2.7419e-01, -6.9545e-01,  6.1390e-03, -2.2452e-01,
        -1.4319e-01,  3.6618e-02, -4.7920e-01, -1.0183e-01, -8.0648e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1267e-01,  4.8378e+00,  6.4266e-01,  1.2079e-01,  7.1341e-01,
         4.3751e-02, -9.5544e-02,  2.8755e-02,  1.9581e-01,  1.1257e-01,
         3.3325e-01, -1.0715e-01,  1.2276e-02, -2.8648e-01,  2.2350e-01,
        -3.3020e-03,  2.7121e-01,  4.3131e-01,  2.3819e-01,  4.3578e-02,
        -1.0758e-02,  2.6492e-01, -7.3090e-02,  1.7683e-01, -1.7003e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2110e-01,  1.5447e+00,  2.6621e-01, -3.8730e-01, -2.8789e-02,
         5.3898e-02,  2.4691e-02,  5.6615e-03,  5.2018e-02, -5.2007e-02,
         2.2252e-01,  1.3025e-01,  2.8490e-02,  7.6407e-03,  5.3450e-02,
         1.4986e-01, -3.4280e-02,  1.0791e-04, -7.2665e-02, -4.1650e-02,
         6.9960e-02, -8.9921e-02,  4.2281e-02, -9.9656e-03, -9.9296e-02,
         2.7100e-02,  1.0641e-01,  1.2375e-01, -1.5374e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0766,  7.2186, -0.4179, -0.5784, -0.1124, -0.1809,  0.4968, -0.1680,
        -0.8643,  0.1164,  0.7639,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3551,  2.7079,  0.1892,  0.0915,  0.1673,  0.1983,  0.0564, -0.1399,
         0.2290, -0.1363,  0.1362,  0.0941, -0.0899,  0.0658,  0.0495, -0.0648,
         0.2608,  0.0981,  0.6347,  0.1671,  0.0680, -0.0081, -0.0612, -0.6043,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6950e-02, -2.8694e+00, -7.1531e-01,  2.1224e-01,  2.3012e-01,
        -3.2797e-02, -1.9962e-01,  8.6373e-02,  1.7266e-01,  5.8899e-02,
         6.1471e-03,  6.8028e-02,  3.8647e-02,  5.2271e-02,  3.2863e-02,
        -3.7119e-02,  4.0004e-02, -1.6156e-03, -2.7749e-02, -2.2090e-02,
         6.4233e-02,  3.8840e-02,  2.3031e-02, -7.1471e-02, -5.1349e-02,
         4.2149e-02,  1.7056e-02, -9.8979e-02, -6.1964e-02,  1.0374e-01,
        -3.1913e-01, -4.1678e-02,  1.0246e-02,  1.0483e-02,  9.3697e-02,
         6.2080e-02,  2.2691e-01,  7.8805e-02,  4.0036e-02,  6.8928e-02,
         5.8797e-02, -1.5662e-01, -2.6626e-02, -7.2326e-03,  8.8649e-02,
         9.9440e-02,  9.5614e-02, -3.2070e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7566, -2.5665, -0.6541, -0.0162, -0.1092,  0.2111,  0.1151,  0.2516,
         0.2774, -0.0087,  0.0947,  0.0633,  0.0319,  0.1242,  0.1666, -0.1377,
         0.0592,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0296, -5.2811,  0.4544,  0.3798, -0.0752, -0.1873, -0.1579, -0.3265,
        -0.0737, -0.2817, -0.2220, -0.0650,  0.3035, -0.3005,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1336e-01,  3.1569e+00,  1.9549e-01,  9.2931e-02,  1.6329e-02,
         2.5103e-04,  6.2475e-03,  2.1930e-02,  5.9637e-02,  1.4350e-01,
        -5.9578e-02, -1.2100e-01, -2.2933e-02,  1.5453e-02, -1.2199e-01,
         4.5857e-02, -9.4302e-02,  1.6279e-02,  8.8999e-02, -9.0315e-02,
        -7.1154e-02,  5.8879e-02,  5.3506e-02, -2.5790e-02, -1.2696e-01,
        -9.2689e-02,  9.9101e-03, -4.8987e-02,  2.4377e-02, -2.5515e-02,
         2.9994e-02, -1.3444e-01,  2.2527e-01,  2.3660e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3452e-01,  4.4619e+00, -2.3955e-01,  3.7358e-01,  3.1225e-01,
        -1.6802e-02, -1.9938e-01,  1.2910e-01,  1.0806e-01, -8.7808e-02,
        -2.1053e-02, -1.1009e-01,  2.1189e-02,  4.3529e-02,  1.0092e-01,
         2.4384e-01, -1.4150e-03, -3.5848e-02, -2.6058e-02, -6.8739e-02,
        -3.4368e-02, -3.1434e-02, -2.1145e-02,  4.1625e-02, -2.3914e-01,
        -2.1782e-01, -8.2753e-02, -8.5252e-04,  3.5032e-02, -1.0209e-01,
         1.9103e-02, -1.6943e-02, -1.4820e-01,  5.9267e-02,  9.9601e-03,
        -6.7246e-02, -3.2347e-02, -4.6396e-02, -1.8260e-01, -1.9492e-02,
        -3.5968e-02,  7.9780e-01,  4.0976e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4933e-02,  3.0656e+00,  4.3165e-01,  5.0878e-02, -4.9596e-02,
        -1.8762e-02,  1.7496e-02, -3.9093e-02,  3.5626e-02, -3.3090e-02,
        -1.6269e-01,  1.3132e-02, -5.9046e-02,  9.2983e-02, -1.0650e-02,
         2.4271e-02,  2.9868e-02,  4.1876e-02, -7.1641e-03,  4.4807e-02,
         5.9222e-02, -1.6455e-03, -1.1023e-01,  2.0979e-02,  9.1558e-04,
        -1.4535e-02, -6.5413e-02, -3.1711e-03, -9.3051e-04,  7.1427e-02,
         1.3831e-01,  2.1493e-02, -2.0298e-01, -2.7713e-01, -1.6929e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 4.1545e-01,  3.3746e+00, -9.8179e-02, -2.8316e-01, -3.2298e-02,
        -1.7458e-01,  9.8765e-02, -2.6952e-02,  1.7496e-03, -2.9175e-02,
        -6.0004e-02,  5.6075e-02,  2.7405e-03,  1.1872e-01,  3.6537e-01,
         1.4632e-01, -1.4844e-02, -2.7215e-02, -1.0274e-01, -1.0547e-01,
         5.0924e-02, -3.0239e-01, -5.6790e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6333e-01, -4.2703e+00, -8.7361e-01,  4.9384e-01,  5.8433e-01,
        -4.7751e-02,  1.4572e-01,  5.9041e-02,  2.8733e-01,  1.2706e-01,
         1.1013e-01,  1.5132e-01, -1.7373e-01,  4.1593e-02,  8.5416e-02,
         3.9938e-02, -1.6992e-01,  4.8733e-04,  3.6516e-01,  1.3000e-01,
        -3.6156e-02, -6.7817e-02,  1.8544e-01,  1.5383e-01,  8.4437e-02,
         2.9092e-02, -5.4129e-02,  1.2581e-01,  1.2903e-01,  1.6914e-02,
        -5.1339e-02,  4.8401e-03,  7.4609e-02,  3.9564e-02, -6.9813e-02,
        -9.6595e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9524,  2.8026,  0.0955, -0.3320,  0.0322, -0.1108,  0.1673,  0.0513,
         0.1201, -0.1197,  0.0489, -0.1869,  0.0747, -0.4053, -0.1885,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2598, -5.3592, -0.3372, -0.1262,  0.0985,  0.1099,  0.5486, -0.1468,
         0.0365, -0.3034,  0.1715,  0.3736,  0.2481,  0.4283, -0.4792,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3840, -3.0434, -0.1105,  0.0663,  0.0807, -0.1179, -0.0629,  0.1658,
         0.1001, -0.0387,  0.1585, -0.0508, -0.2105, -0.0179,  0.0970, -0.1275,
        -0.0915, -0.1054, -0.0226,  0.0835, -0.0935,  0.0798,  0.0876,  0.0755,
         0.1639,  0.2520, -0.0109, -0.0370, -0.0709, -0.0618, -0.0296, -0.0448,
        -0.1073, -0.0528,  0.0927,  0.1695,  0.0220,  0.0581, -0.0526,  0.0282,
         0.1034, -0.2125, -0.0408, -0.0154, -0.0825, -0.1266, -0.0429,  0.0637,
        -0.0090,  0.0839, -0.1401,  0.1045], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5408, -2.7375, -0.0698,  0.1860,  0.1396,  0.1535,  0.0314,  0.0609,
         0.1047, -0.2693,  0.0427,  0.0321,  0.0245,  0.0174,  0.0323, -0.0289,
        -0.0162, -0.1357,  0.3311, -0.0754,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0036, -1.2564, -0.0207,  0.1004, -0.0699,  0.0769,  0.0793, -0.0405,
        -0.1021, -0.0124,  0.0427, -0.0859, -0.1167, -0.0035,  0.0186,  0.0424,
         0.0281, -0.0869,  0.2240,  0.0887, -0.2930, -0.2727,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0609,  4.6610,  1.0416, -0.0406,  1.0849,  0.4416,  0.0918,  1.4754,
        -0.2239,  0.2089, -0.7333,  0.5054,  0.0263,  0.1500,  0.0218,  0.3879,
         0.0238, -0.0159,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9311, -4.6564, -0.0624, -0.1160,  0.0095, -0.0799,  0.3605,  0.5484,
         0.3814, -0.0251,  0.0125, -0.0261,  0.1934, -0.0081, -0.0107,  0.0293,
        -0.1406,  0.0387, -0.0324, -0.1386,  0.1902,  0.1098,  0.1235, -0.0443,
         0.8615,  0.3779,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.4582e-01, -3.0451e+00, -2.6727e-01,  2.9944e-01, -1.4314e-01,
        -1.1359e-01, -2.4787e-01, -2.7331e-03, -3.0866e-02, -7.1580e-02,
        -5.0183e-01,  6.9707e-02,  9.6446e-02,  2.3684e-02,  7.0608e-02,
        -9.0495e-02, -5.4261e-02, -1.4523e-01, -1.0313e-01,  3.9409e-01,
        -4.6644e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1004, -1.4706, -0.2080,  0.3264,  0.0550, -0.3636, -0.7846,  0.1969,
        -0.0202,  0.4745,  0.1285,  0.0264,  0.0463,  0.4359,  0.0372,  0.3160,
         0.2385, -0.0904, -0.0321, -0.1787,  0.1676, -0.0336, -0.2133, -0.1062,
         0.2732,  0.0841, -0.0017, -0.0284, -0.1316, -0.0060, -0.3731, -0.0773,
         0.0180, -0.0377,  0.0081, -0.0840, -0.0490,  0.0261,  0.0987, -0.0660,
         0.0225, -0.0564,  0.0031,  0.2239,  0.0370,  0.1118,  0.4723,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6659,  1.8684,  0.8611,  0.0305,  0.1237, -0.3691, -0.2717, -0.8894,
        -0.8818,  0.3165,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 4.8940e-01, -2.7556e+00, -2.6173e-01, -1.4256e-01, -9.7228e-02,
         9.0467e-02,  5.2850e-02,  1.5906e-02,  2.5354e-02, -1.1382e-01,
        -6.0023e-02, -1.2856e-03,  2.8166e-02,  1.0458e-02,  1.0954e-02,
         1.4203e-01,  1.6600e-03,  3.0077e-04,  5.7980e-04,  5.2655e-02,
         6.3898e-02,  1.1507e-01,  8.5552e-02,  3.0683e-02, -1.1748e-02,
        -2.4514e-02,  1.0511e-01, -2.9906e-02, -1.1199e-02, -1.1589e-02,
         4.7002e-02,  8.9962e-03,  4.2106e-02,  3.8494e-02,  3.6377e-02,
         1.6261e-01,  4.7447e-02,  9.0493e-02,  9.2665e-02, -5.8260e-02,
         9.9200e-03,  4.3889e-02,  1.9366e-02, -4.1610e-02, -3.2117e-02,
         4.3615e-02, -1.2491e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2307,  7.5614,  0.8889,  0.1850,  1.1379,  0.1628, -0.4029,  0.4942,
         0.0402,  0.0482, -0.0473, -0.2389, -0.0755, -0.3148,  0.1962,  0.1356,
        -0.0804, -0.0534,  0.2766,  0.0736, -0.0462, -0.1763, -0.0104,  0.2092,
         0.2144, -0.4746,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2227, -2.4445, -0.2605, -0.3277,  0.0453,  0.4324,  0.0145, -0.0397,
        -0.0383, -0.0469,  0.2674, -0.0984, -0.0763,  0.0716,  0.0749,  0.2862,
         0.0361, -0.0106,  0.0715,  0.0536,  0.0641, -0.1824,  0.2193,  0.0382,
         0.0712,  0.0710,  0.0224,  0.7401,  0.0311,  0.0342, -0.0597, -0.0241,
        -0.0192, -0.0052, -0.0249, -0.3258, -0.2004,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6808,  2.5893,  0.6997, -0.8932, -0.0043,  0.2884, -0.5524,  0.0794,
        -0.1193, -0.1351,  0.0414,  0.2004,  0.2531, -0.5671,  0.3666,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1224, -5.3944, -0.1097,  0.3266,  0.4883,  0.2811, -0.0778,  0.0602,
        -0.3740,  0.1382,  0.2072, -0.2305, -0.1288,  0.2664,  0.3767, -0.5445,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9425e-01, -3.0934e+00,  7.1997e-02,  1.0730e-01, -4.9291e-02,
         8.2829e-02,  7.5053e-02,  9.5193e-02, -2.9321e-02, -6.0768e-02,
         1.4591e-02,  4.6041e-02, -1.7114e-01, -1.4865e-01,  4.5029e-02,
        -1.5535e-01, -1.6713e-04,  1.1007e-02,  3.0567e-02, -6.8206e-02,
         1.0321e-01, -1.1374e-01, -1.3394e-01,  8.6383e-03,  2.2319e-02,
         1.5611e-02, -3.3142e-02, -4.2239e-02,  8.0645e-03,  2.3093e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8924e+00,  7.7646e+00, -1.8176e-01,  1.9986e-01,  2.7933e-03,
         1.2013e-01, -2.0230e-02, -1.2595e-01, -3.3878e-01, -5.5762e-01,
        -2.8255e-01, -3.6691e-01, -4.7844e-03, -1.5535e-01,  2.1817e-02,
        -1.3836e-01, -7.8134e-02,  1.5803e-01,  8.7640e-02, -1.5485e-01,
         6.0324e-02, -4.3697e-02, -1.3021e-01,  6.5629e-03, -1.1906e-01,
         8.7541e-02, -4.6604e-03,  3.1606e-02,  2.6718e-01,  1.1477e-01,
         7.0169e-02,  2.3848e-01,  2.3239e-02, -9.0402e-02,  3.2464e-01,
        -2.5905e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1281e-01,  2.9903e+00, -8.4617e-02,  1.7675e-01, -7.7742e-02,
        -9.8479e-02, -5.0210e-01, -2.2427e-01, -7.0437e-02,  3.5953e-02,
        -3.2761e-02, -2.1603e-02, -7.8932e-02,  4.9049e-02, -4.6665e-02,
        -9.9972e-03, -8.6285e-02,  1.5901e-02, -1.4877e-02, -3.5109e-02,
        -9.0469e-03, -1.7295e-03, -1.1034e-02,  8.4347e-02,  1.8561e-01,
         7.2713e-02, -1.1163e-02,  4.1486e-03,  4.5235e-02, -4.8782e-02,
        -3.0258e-03,  1.0095e-01, -1.8299e-02, -4.3074e-01, -2.7626e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6409,  3.6190,  0.3036,  1.4084,  0.8048,  0.0784, -0.2480,  0.2966,
         0.6723, -0.5751,  1.6514,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1614, -2.0762, -0.8247, -0.5072, -0.2573,  0.2396,  0.0369, -0.0522,
         0.0337,  0.0986,  0.0583,  0.0330, -0.0441, -0.1098,  0.1044,  0.0838,
         0.0546, -0.0230,  0.0817,  0.1306,  0.0709,  0.2213,  0.0163, -0.1099,
         0.2534,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1138,  2.1172,  0.1244, -0.0242,  0.0068, -0.2283, -0.1090,  0.1000,
        -0.0304,  0.1108,  0.1160,  0.1623,  0.0656,  0.0561,  0.0343, -0.0237,
        -0.0350, -0.0570, -0.0321,  0.2815, -0.0819, -0.1081,  0.0474,  0.0698,
         0.3858,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8647e-01,  2.5842e+00,  3.5452e-01,  1.2177e-01,  5.8816e-02,
         5.5657e-02,  1.0384e-01,  7.7678e-03, -3.3096e-03,  7.0174e-02,
         4.9857e-04, -5.4041e-03, -2.9426e-02,  8.7290e-03, -1.1459e-02,
        -4.0908e-03, -2.7387e-03, -3.7729e-02,  7.2091e-02, -6.5199e-02,
         2.9243e-02, -4.3391e-02, -4.0907e-02, -2.3370e-02,  2.4408e-02,
         8.1519e-02, -1.9763e-01, -5.1759e-02,  1.8008e-02, -7.9856e-02,
        -1.2153e-01, -1.3490e-01, -3.1378e-01, -2.1681e-02, -6.2765e-02,
         3.1576e-02,  1.5873e-02,  5.2283e-02,  5.1981e-02,  1.7890e-02,
        -6.1081e-02,  3.4571e-03, -5.8095e-02,  4.2664e-02,  3.8950e-02,
         4.9330e-02,  2.5147e-03, -7.6464e-02, -4.9163e-03, -1.1458e-01,
         1.8248e-02,  1.6060e-02,  1.1979e-01,  6.6678e-02], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.0686, -2.2049,  0.0302, -0.1402,  0.1576, -0.0419, -0.0125,  0.0138,
        -0.0245, -0.0431,  0.0553, -0.1191, -0.1678,  0.0205,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2149, -3.2119,  0.3163, -0.2309, -0.2384, -0.2144,  0.0731, -0.2696,
        -0.0825,  0.0461,  0.0984,  0.1557,  0.1937, -0.1296,  0.0874, -0.1990,
         0.2601, -0.0534,  0.2732, -0.1443,  1.0083,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6584, -4.0500, -0.1041,  0.3414,  0.1478,  1.2630,  0.0131, -0.2907,
        -0.0047,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2747, -2.8428, -0.0034, -0.0809, -0.0638,  0.0170,  0.1665,  0.4383,
         0.0125, -0.0229, -0.3353,  0.0082,  0.0712,  0.1490,  0.1730, -0.1668,
        -0.0270,  0.0174, -0.3241, -0.1378, -0.1699, -0.0130,  0.0070, -0.0060,
         0.1422,  0.0718,  0.3798, -0.0817,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6624, -2.9258, -0.4488, -0.1722,  0.1759, -0.2009, -0.0718,  0.1617,
        -0.1099,  0.1172,  0.1962, -0.0370, -0.0528, -0.0076, -0.1023, -0.1517,
        -0.1764,  0.0891,  0.1654,  0.0655, -0.5020,  0.2125,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3360, -7.2362,  1.8418, -0.5962, -0.2744, -0.2299, -0.2348, -0.0362,
         0.1498, -0.0073,  0.0561,  0.2706,  0.2142, -0.0702, -0.2129,  0.1771,
        -0.0654, -0.0777, -0.0551, -0.2110,  0.0396, -0.0112, -0.0747,  0.0339,
        -0.0930, -0.0398,  0.0410, -0.0285, -0.0681,  0.0240,  0.6267, -1.4540,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6176e-01, -2.9232e+00,  2.7107e-02,  2.2175e-01,  1.0754e-01,
        -7.6168e-02, -4.7777e-03,  1.7034e-01,  1.0203e-01,  4.3786e-02,
        -1.1597e-01, -8.8681e-02,  4.1875e-02, -3.6308e-02, -1.5648e-01,
        -1.4392e-01, -9.5657e-02, -7.0774e-02,  7.3662e-03,  9.7029e-02,
        -2.7823e-02, -2.0480e-02, -5.0561e-02,  3.2319e-04,  4.8145e-02,
        -2.4682e-02, -2.6091e-02, -3.6081e-02, -4.4553e-02,  4.3907e-03,
         3.2370e-01,  6.0005e-03,  9.1805e-02, -2.1678e-02, -1.3198e-01,
        -2.2854e-02,  2.5580e-01,  6.9298e-02,  9.2883e-03,  1.9550e-02,
         6.3655e-02, -4.9678e-02,  2.2005e-03,  2.4407e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4527, -4.1048, -0.0839, -0.0226,  0.0518,  0.2649,  0.2096,  0.1267,
        -0.1881,  0.3502,  0.0086,  0.2154,  0.0048,  0.1782, -0.0204, -0.0673,
        -0.3319,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.4357e-01,  5.2236e+00, -2.0597e-01,  7.8161e-02,  2.6652e-01,
        -1.5869e-01, -7.3660e-02,  2.6980e-03, -1.3362e-01,  7.6042e-02,
        -6.2920e-02, -7.8420e-02, -1.3139e-01, -2.5886e-03,  6.6140e-02,
         9.1087e-02,  8.3281e-02,  2.0142e-02, -1.8838e-01, -1.7672e-01,
         4.3293e-02,  1.5709e-01,  1.1458e-01,  2.5818e-01, -5.6695e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0731e-01, -3.4315e+00, -3.4360e-01, -1.0398e-01, -1.4750e-01,
        -1.0218e-01, -1.8705e-02, -1.7309e-01,  6.2043e-02, -2.0576e-01,
         2.0405e-01, -1.4552e-01, -1.8001e-01, -1.2791e-02, -4.5032e-02,
         3.3957e-03, -2.1841e-02, -2.9208e-02, -2.3847e-01, -3.6636e-01,
         1.6164e-01,  5.3388e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2503, -1.9793,  0.2490, -0.1427, -0.0124, -0.5919, -0.0228, -0.3778,
         0.3650,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.3672e-01, -2.7233e+00,  1.8315e-01, -2.5127e-01,  2.8774e-02,
         9.0438e-02,  1.7089e-01,  5.3144e-02, -2.0990e-02, -1.1883e-02,
         4.4845e-02, -7.4910e-02,  1.6453e-01, -1.3479e-02,  1.0350e-01,
         2.1121e-01,  1.7341e-02, -2.7608e-01,  1.0938e-01,  5.4730e-02,
         2.2362e-02,  8.4217e-02,  8.0085e-02, -6.8162e-02,  5.8882e-02,
         2.5664e-01,  5.0258e-02, -3.3531e-02, -1.4277e-01,  3.7728e-02,
         2.5669e-03,  7.7348e-02, -7.8516e-02, -1.3387e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-4.8359e-02,  3.4098e+00, -3.8074e-01, -2.6464e-01,  8.4385e-02,
         7.5528e-02, -2.7676e-01,  2.8600e-02, -3.6685e-02,  3.4041e-02,
         2.8759e-02, -3.5745e-02,  7.9839e-02, -3.4938e-02, -4.9345e-02,
        -2.2894e-01, -1.3645e-01,  6.7848e-02,  1.5569e-01, -1.2237e-02,
         7.2917e-02,  3.2830e-02, -1.1064e-01, -2.1333e-02,  1.2143e-01,
         4.5598e-02, -1.0093e-03,  4.3718e-02,  2.5681e-02,  7.2951e-02,
        -1.0535e-01,  6.8157e-02, -6.1962e-02, -4.3278e-02, -1.1500e-01,
         5.6351e-02,  4.5286e-02, -6.8117e-02, -4.4762e-02, -1.2724e-02,
        -4.4372e-02, -1.6038e-02, -3.7258e-01,  4.7467e-02,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8064, 10.3546, -0.4226, -0.8948,  0.0446, -0.1230, -0.1399, -0.2242,
         0.0296,  0.0817,  0.2793, -0.1562,  0.1728,  0.0998, -0.0234, -0.3281,
        -0.4198,  0.2742, -0.0271, -0.3365, -0.1308,  0.2629, -0.1615,  0.5540,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0720, -4.6459, -1.1605,  0.5656,  0.1886,  0.2661,  0.0086,  0.0232,
        -0.0480,  0.5177,  0.3185,  0.1781,  0.4003,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0111,  4.4375, -0.0646, -0.7247,  0.0558, -0.3467, -0.1196, -0.3149,
        -0.2542, -0.5021, -0.1198,  0.5395,  0.2184,  0.0425, -0.2482, -0.1990,
        -0.1928, -0.0341, -0.0589,  0.0418, -0.9317, -1.3264,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.7810e-01,  3.6070e+00,  7.6853e-02,  1.4805e-01, -2.3053e-02,
        -3.3765e-03, -1.1588e-01, -5.5199e-02, -7.0973e-02, -3.7042e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3523, -3.4414,  1.1738, -0.0401, -0.4497,  0.1559,  0.2531, -0.0204,
        -0.0567, -0.1801,  0.2799, -0.0065,  0.0947, -0.1328, -0.0716, -0.0304,
        -0.2593, -0.0519, -0.1985, -0.0819, -0.2348, -0.8720,  0.6882,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.6907e-01, -2.7966e+00, -3.9214e-01, -1.7831e-01,  3.1574e-02,
         7.2792e-02,  1.5504e-01, -2.2159e-01, -2.2850e-02,  9.1303e-02,
        -7.5496e-02, -5.2957e-03,  6.3364e-02,  8.1578e-02, -4.3895e-04,
         7.0074e-02,  4.6195e-02,  1.5794e-01,  8.8593e-02,  2.3808e-01,
         4.6938e-02,  7.0824e-02,  2.6315e-02,  6.7893e-02,  8.9369e-02,
        -4.0518e-02,  1.4096e-02,  1.0203e-01,  3.5156e-04,  7.7968e-02,
         1.7495e-01,  1.4582e-01,  1.5688e-01, -4.3169e-02,  1.5680e-01,
         6.1883e-02,  3.0485e-02,  4.9935e-03, -4.3396e-02, -6.8423e-02,
        -1.0310e-01, -3.4990e-02, -1.1235e-01, -2.3269e-01, -6.3667e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7299,  4.4295, -0.1150, -0.2561, -0.0074, -0.0607,  0.2959, -0.2453,
        -0.0393, -0.0616, -0.2053, -0.0503,  0.0673, -0.1228, -0.0734, -0.0906,
        -0.1450,  0.0694,  0.0234, -0.2523,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1042, -2.5634,  0.2375, -0.0329,  0.0663,  0.0814,  0.1307,  0.1729,
         0.2470,  0.1371,  0.1022,  0.1840,  0.1081, -0.0450,  0.1019,  0.1388,
         0.3268, -0.0294, -0.1445, -0.0544,  0.0527, -0.1330,  0.1429,  0.1445,
         0.0340, -0.0493,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0193e+00, -7.4536e+00, -6.1456e-01,  8.6326e-02,  4.6646e-01,
        -1.0708e-01, -7.8612e-02, -7.3244e-02, -4.6973e-01, -1.0713e-01,
        -1.5212e-01,  1.1110e-01, -5.2212e-02, -1.8668e-01, -1.2870e-02,
        -6.0010e-02,  5.0759e-02,  1.2025e-01, -7.5469e-02, -4.8369e-02,
        -4.8925e-02,  6.9518e-04, -1.5932e-01,  7.1834e-02,  1.0079e-01,
         6.9204e-02,  1.2862e-01, -2.8060e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7270, -4.4734, -0.3839,  0.1034, -0.3678, -0.0656, -0.0474,  0.3574,
        -0.0803,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5404e-01, -3.8577e+00,  2.2406e-01,  8.1632e-01, -1.7692e-01,
        -2.0556e-01,  3.5925e-02,  1.9071e-01,  4.5225e-02, -3.0142e-01,
        -9.0408e-02, -4.8448e-02, -1.7947e-02, -9.8767e-02, -4.9005e-01,
         1.0841e-01, -1.0404e-02, -6.0872e-02,  2.1322e-01, -7.8177e-02,
        -5.0352e-02,  2.3573e-01,  4.0714e-02,  6.7683e-02, -1.2865e-01,
        -3.7643e-03,  1.6688e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 4.5856e-01,  6.8139e+00,  3.2526e-01, -4.8682e-01,  1.3468e-01,
        -1.9265e-01, -7.2557e-02,  1.3512e-01,  5.2465e-02, -3.5803e-02,
        -9.7045e-02,  3.4706e-01,  4.5578e-02,  8.6945e-02,  1.0779e-02,
        -1.3149e-02, -4.1555e-02,  4.0198e-02,  2.6010e-03, -1.3746e-01,
         3.5371e-02, -7.7567e-02,  8.3442e-02,  1.8195e-01, -1.5066e-02,
        -1.0417e-01,  9.9202e-02,  2.1499e-02,  4.7831e-02,  2.3741e-01,
         1.1296e-02,  7.5853e-02, -4.3567e-03,  1.4399e-02, -7.2645e-02,
        -9.5105e-03, -6.5566e-02, -6.5426e-02,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4971e+00,  6.1056e+00,  1.7216e+00,  2.9086e-01, -1.8786e-01,
        -6.3970e-02, -2.3495e-01,  4.9591e-02,  2.0965e-01,  1.2435e-01,
         1.0626e-01,  1.8879e-01, -5.8025e-02, -1.3761e-01,  2.1218e-01,
         1.3057e-01, -2.5118e-02,  2.1660e-03, -3.4955e-01,  2.4165e-02,
        -1.8115e-01,  2.3138e-01,  2.2602e-02, -1.9450e-01, -1.5832e-01,
         6.0351e-02,  1.4484e-01,  1.3358e-01, -9.9650e-03,  3.6195e-01,
        -2.9947e-01,  1.5861e-01,  1.5410e-02, -1.0408e-01, -4.6952e-02,
         1.4473e-01,  9.2782e-01, -2.1274e-01,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9720e-01, -1.0113e+01, -6.2885e-01,  5.6270e-03,  3.1800e-01,
        -2.6610e-03, -1.0395e-01,  2.2122e-01,  3.1186e-01,  7.4752e-02,
         3.1514e-02,  3.5108e-02, -9.9516e-01,  8.1752e-02,  1.4550e-01,
        -1.6947e-01,  5.8084e-02,  7.6108e-02, -2.3760e-01, -3.2184e-02,
        -1.1055e-01, -4.5318e-02,  4.0366e-01, -7.7391e-03,  2.5907e-02,
         1.4342e-01,  1.0077e-01, -2.9737e-01,  1.8707e-01, -6.4241e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.2845e-01, -4.9297e+00,  2.0972e+00, -9.2522e-02, -1.8695e-01,
         1.0249e-01,  1.7685e-02,  3.1795e-01,  6.9179e-04,  1.0176e-01,
         3.8125e-02, -5.6600e-02,  1.7734e-02, -8.9336e-02,  5.3763e-03,
         1.2275e-01, -3.9355e-02,  2.9834e-02,  1.7991e-03, -1.3270e-02,
         7.2490e-02,  2.4562e-02,  3.5778e-02,  2.5594e-02,  1.2286e-01,
        -3.4172e-02, -7.8615e-02, -8.1663e-02, -6.6764e-02,  8.1029e-02,
        -3.5510e-02,  9.7070e-02,  5.2117e-02, -1.2582e-02,  2.0724e-02,
        -1.6135e-02,  6.0395e-02,  4.9994e-02, -2.6813e-01, -2.2428e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0201e-03, -3.0913e+00,  8.1607e-02,  6.3898e-02, -2.2006e-01,
         8.7827e-02,  2.4257e-02, -1.5095e-01, -1.0274e-01,  1.3449e-02,
        -3.2809e-01,  5.9580e-02,  1.2952e-01, -1.3831e-01,  4.8949e-02,
         1.4412e-02,  2.6331e-02, -6.5954e-02, -2.8614e-01,  2.5666e-01,
        -2.1412e-01, -2.0277e-01,  2.4001e-02,  9.6372e-02,  3.8098e-02,
         1.6476e-02, -2.4060e-02, -3.0298e-02, -2.9309e-02,  2.5948e-02,
         9.1287e-02,  4.6223e-02,  9.0901e-02,  1.0539e-01,  2.9374e-01,
         8.7915e-02, -1.4292e-01, -2.5999e-01,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0455,  3.3412, -0.0321,  0.2769, -0.0109,  0.0776, -0.1182, -0.3286,
        -0.1281, -0.3397,  0.0069, -0.1956,  0.0544, -0.1524, -0.1704,  0.0416,
         0.0541, -0.0275, -0.0690, -0.0239,  0.0985, -0.4838, -0.0771,  0.1136,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8040e+00, -5.6956e+00, -5.4311e-01,  1.7945e-02, -2.5359e-01,
         2.4037e-01, -2.9355e-02,  2.1470e-02,  1.1504e-01, -4.3923e-02,
        -9.2726e-02, -9.7501e-02, -8.5066e-02,  2.4390e-01,  5.0721e-02,
         6.1220e-02, -5.0491e-04, -1.4316e-02,  2.0767e-02,  3.4952e-02,
        -5.5773e-02, -1.4737e-01, -4.7188e-02, -5.7554e-02, -6.9339e-02,
         5.0232e-02, -1.7156e-01,  1.0318e-01,  1.3316e-01, -8.3076e-02,
         2.8017e-02, -3.0342e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2113, -3.4526, -0.2205,  0.1118, -0.0750, -0.1452, -0.8874,  0.2115,
         0.3653,  0.0250, -0.6343,  0.0515,  0.2230,  0.1563,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6569e-01,  5.6669e+00,  2.9028e-01, -4.0274e-01, -3.5853e-01,
        -3.0870e-01,  3.8846e-03, -4.9817e-01, -3.0443e-01, -6.2501e-02,
         1.8427e-01, -5.1821e-02,  7.9821e-02, -1.5904e-01, -4.3817e-02,
        -1.1152e-01, -2.7139e-02,  2.6274e-01, -4.3040e-02, -2.9223e-01,
         1.7659e-01, -2.0335e-01, -2.5562e-01, -1.2933e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1450, -2.4442, -0.0088, -0.0803,  0.0980,  0.2974, -0.0391, -0.0052,
        -0.0668, -0.0046, -0.0405,  0.0844, -0.0056,  0.0458, -0.0057, -0.0799,
        -0.0414, -0.0325,  0.0369, -0.2010,  0.2489,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3041, -4.9855,  0.0883,  0.0111, -0.3797,  0.0082, -0.3002, -0.0244,
        -0.1337,  0.0161, -0.0727,  0.4783, -0.0359, -0.3559,  0.0736, -0.0291,
        -0.0571,  0.1764,  0.0155, -0.2573,  0.0554, -0.3242, -0.0056,  0.1538,
         0.0682, -0.1126, -1.4770,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2741,  5.6736, -0.5771,  0.2545, -0.2339, -0.0312, -0.1740, -0.0481,
        -0.7962, -0.0786, -0.1126, -0.4654, -0.0966, -0.1146,  0.2406,  0.1692,
         0.1007,  0.2520,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-1.7870e-01,  1.1283e+01, -1.1826e+00,  3.0549e-01, -3.0335e-01,
         3.6241e-01,  1.3752e-01, -1.4604e-01, -9.3671e-02, -3.9047e-02,
         3.8732e-01,  1.7609e-01, -3.4689e-02, -1.0667e-01,  7.4706e-02,
         1.9391e-01,  1.1289e-01, -3.5505e-02, -9.8608e-03,  1.4877e-01,
         1.0745e-01, -3.7134e-01, -1.3241e-02,  3.6681e-02, -1.2922e-01,
        -1.9798e-01,  1.4940e-02,  6.2773e-02,  3.4388e-02,  2.0079e-02,
        -1.5568e-01, -2.9663e-01, -2.8760e-01, -2.6814e-02, -1.6259e-02,
        -1.0296e-02, -9.6901e-02, -4.7009e-03,  2.0188e-01,  1.2766e-01,
        -1.2395e-02, -4.9362e-02, -4.5544e-02,  9.3637e-02,  8.3169e-01,
         1.3381e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.2114e-01,  3.5645e+00,  2.8555e-01, -3.4954e-01, -3.2878e-01,
        -5.2284e-02, -1.6712e-01, -7.0535e-02,  3.9816e-02, -9.3449e-02,
        -3.2899e-02, -1.8063e-01, -6.3946e-02, -8.3409e-02, -2.1883e-02,
        -8.9882e-02, -8.0439e-02, -7.1825e-02,  3.0452e-02, -3.1861e-01,
        -1.6882e-01,  4.1997e-01,  1.2332e-01,  4.3952e-04,  1.0589e-01,
         5.8868e-02, -6.4971e-02,  6.5545e-02, -2.3034e-02,  1.7977e-02,
         1.2357e-01, -2.4796e-01,  5.5548e-02,  6.4355e-02,  4.0573e-02,
        -9.1207e-02,  2.3221e-01,  3.6233e-01, -3.3317e-02, -5.8205e-02,
         2.3239e-02, -8.0138e-03, -1.0013e-01, -1.3345e-02,  2.2271e-01,
        -2.4015e-01, -4.1288e-02,  4.2855e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1188,  5.4686,  0.1076, -0.2349, -0.0197, -0.1165,  0.0201, -0.1574,
        -0.2627, -0.1334,  0.3814,  0.1358,  0.0842,  0.1167,  0.0274, -0.0097,
        -0.0599, -0.2427, -0.0159, -0.0474, -0.1320,  0.0479,  0.3831, -0.0604,
         0.0189,  0.0386,  0.0414,  0.0091, -0.2831,  0.0908, -0.2170, -0.0059,
         0.8331,  0.3399,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3509e-03,  1.0843e+01, -1.6451e-01,  7.9981e-01,  1.4511e+00,
         3.2549e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7060e-02,  4.5527e+00, -1.2785e+00, -1.4379e-01, -2.2679e-01,
        -2.6760e-02,  5.1110e-02,  8.9735e-02, -1.6291e-01, -4.3667e-02,
        -1.2218e-01,  6.9508e-02, -7.5105e-02,  1.9535e-01,  4.8577e-02,
        -7.5240e-03,  1.9806e-02, -9.8177e-02, -5.0035e-02,  2.1997e-01,
         1.3298e-01, -7.7305e-02, -7.8444e-02, -5.9000e-02,  8.0068e-02,
         2.7349e-02, -1.5503e-03,  7.8371e-02,  1.9333e-02, -6.4716e-02,
        -7.2529e-02,  1.7853e-03, -5.3853e-03, -3.2485e-02, -6.1120e-02,
        -4.9692e-02,  7.4388e-03, -7.3806e-02,  5.2644e-03, -4.1889e-02,
         1.0302e-01, -1.0670e-02,  7.0332e-02, -2.7464e-03,  7.0954e-01,
        -7.1933e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9198e-02, -1.0598e+00,  2.2324e-01, -3.5488e-02, -2.1792e-03,
        -1.4032e-02, -4.8181e-02, -2.1352e-02, -5.4331e-02, -3.3960e-02,
        -5.7000e-02,  2.3521e-02, -9.7500e-04, -5.8967e-02, -2.6067e-02,
         6.2068e-02,  1.9046e-01,  7.6979e-02,  2.4942e-02, -6.1984e-02,
        -1.2279e-01, -7.8704e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1366, -4.0969, -0.3413,  0.1446, -0.4211,  0.0368,  0.1419, -0.0563,
         0.0764,  0.3536,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4615, -7.6220,  0.4192, -0.4066, -0.0131,  0.0719,  0.1542, -0.4568,
        -0.4148,  0.1878,  0.0911, -0.1452,  0.0985,  0.1641,  0.4336,  0.5426,
         0.5307, -0.4854,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0098, -2.5024, -0.2069, -0.2175,  0.1957, -0.1307, -0.3392,  0.0321,
         0.0030, -0.0389,  0.1850,  0.0672, -0.0056,  0.0842, -0.0388, -0.0500,
         0.0028,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0184, -4.9243, -0.1913,  0.1404, -0.3529,  0.4141,  0.0457, -0.0408,
         0.1567,  0.4461, -0.2081,  0.4233,  0.2916,  0.3760, -0.1256, -1.1523,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9920,  6.3901, -0.8209,  0.0666, -0.1315,  0.2060,  0.2686,  0.0661,
         0.0140,  0.0505, -0.2363,  0.0516,  0.1721,  0.0646, -0.0962,  0.2110,
        -0.7061,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1291, -7.9925, -0.9201, -0.0244,  0.0957, -0.1598, -0.0276,  0.2733,
         0.0472,  1.4000,  0.0419,  0.0416, -0.0999,  0.3621, -0.2018,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-1.7193, -8.0074,  0.2077,  0.1504,  0.4873, -0.1759,  0.1704,  0.0979,
         0.5527,  0.3050, -0.3807, -0.3045, -0.0106,  0.3484,  2.7511,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5048e-01, -3.0750e+00,  8.8395e-03, -1.4449e-03, -9.6934e-02,
         4.2425e-02,  3.7046e-01,  4.0045e-02,  8.6642e-02,  1.1544e-01,
         3.6299e-02,  2.7704e-01, -6.8632e-02,  2.9637e-01, -1.3182e-01,
         6.4529e-02, -8.7516e-02,  3.4780e-01, -1.8178e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3165,  6.8949,  0.1679, -0.0936, -0.0875, -0.3205, -0.1948, -0.1879,
        -0.4831,  0.2641,  0.3914,  0.1385,  0.1701, -0.0746, -0.1510,  0.3198,
         0.2113,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3789e-01, -5.4762e+00, -2.5659e-01, -1.1336e-01,  2.6750e-01,
         1.3865e-01,  1.3711e-01,  2.9322e-01, -2.4822e-02,  2.5835e-02,
        -1.0763e-01,  1.2145e-01, -8.2772e-02,  3.3049e-02,  5.4518e-02,
         1.4745e-01,  5.7604e-01,  6.7848e-02,  4.3206e-02,  2.5029e-02,
         6.8108e-02, -1.9613e-02,  2.6021e-01, -1.8960e-02, -1.6222e-02,
         5.0239e-02, -8.1387e-02,  4.7269e-02, -5.0246e-02,  6.6417e-02,
        -8.0638e-02,  4.7323e-02,  1.1955e-01, -6.6107e-02, -1.1654e-01,
         8.1090e-02, -2.7093e-02,  3.6965e-03,  5.8601e-02, -4.4953e-02,
        -1.6014e-01,  6.3867e-02,  6.0324e-02, -4.5807e-02, -6.1134e-02,
        -6.1796e-03, -2.8486e-02,  5.9485e-02, -1.1478e-01, -3.2246e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6928,  6.2857,  0.8188, -0.2268,  0.1523, -0.0947, -0.3281,  0.1798,
         0.3074, -0.1335, -0.5032, -0.1262,  0.0179,  0.2603,  0.2251, -0.1556,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.0761, -12.4110,  -0.9096,  -0.4833,  -0.3910,  -0.2869,   0.1548,
         -0.3094,   0.0202,   0.2156,   0.3223,  -0.0815,  -0.0571,   0.1379,
          0.2557,   0.2581,  -0.8699,  -0.1453,  -0.2612,  -0.4013,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7126e-01, -3.4287e+00, -8.0973e-01,  1.0051e-01,  7.5581e-02,
         8.1200e-02, -8.0398e-02, -2.5491e-01, -1.5680e-01, -9.3885e-02,
         1.8656e-01,  1.3843e-01, -1.1558e-01, -8.3997e-04,  2.8891e-02,
        -2.8998e-02, -6.3003e-02,  9.2796e-03, -6.4793e-03,  1.3286e-01,
         8.3120e-02, -4.4292e-02, -1.1045e-02, -2.7728e-03,  3.6887e-02,
         6.1275e-01,  2.8172e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1261, -2.0269, -0.0086, -0.0751, -0.2421, -0.0598,  0.1379,  0.1631,
         0.0591, -0.1099, -0.0480,  0.0268,  0.0711, -0.2878,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8198, -3.2191,  0.0133, -0.4140, -0.0285, -0.0383, -0.0603,  0.0372,
         0.2363,  0.1657, -0.2426, -0.0672, -0.3465, -0.3947,  0.6166,  0.1469,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9057e-01,  2.7898e+00, -2.1810e-01, -2.4354e-01,  2.0972e-01,
         1.2714e-01,  3.0154e-02, -1.8793e-02,  4.2414e-03,  4.9831e-02,
         5.7263e-02, -4.1830e-03,  2.0825e-02,  7.0293e-02,  2.8015e-02,
         3.5938e-02,  1.0012e-01, -3.9327e-02, -1.2423e-01, -1.1593e-02,
        -6.9314e-04,  1.2455e-01,  1.2375e-02, -2.6335e-02, -3.4595e-02,
         8.6222e-02, -3.4682e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4778e-02,  6.1621e+00,  5.2394e-01, -3.6777e-01,  2.7452e-01,
         1.4005e-02,  1.3896e-01,  2.1017e-01,  1.4802e+00,  1.6849e-01,
         9.6745e-02,  2.0268e-01,  4.4076e-02,  2.9871e-01,  5.7432e-02,
         6.5542e-01,  5.6987e-01,  4.4471e-03,  2.1583e-01,  7.4273e-01,
        -2.0442e-01, -1.7097e-02,  1.6284e-01,  1.7907e-01,  3.0324e-01,
        -2.9052e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5360e-01, -3.7919e+00, -1.5011e-01,  5.5129e-02, -1.4119e-01,
         1.2959e-01,  2.8054e-02, -5.8418e-03, -2.4035e-02,  8.0734e-02,
        -1.0770e-01,  6.8631e-02, -1.1159e-01,  7.7092e-02,  5.2127e-02,
        -6.0037e-02,  3.9787e-02, -7.5550e-02, -2.5248e-03,  4.8803e-02,
         9.1349e-02,  2.1514e-01, -1.2924e-01,  2.6704e-02,  8.6674e-02,
         4.3529e-02, -1.3499e-01, -7.9627e-02, -1.1001e-01,  4.2701e-01,
        -1.7310e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-1.2008e-01, -3.7182e+00,  2.4208e-01,  1.5224e-01,  9.6709e-02,
         7.6204e-02,  3.7968e-01, -4.4167e-02,  1.9174e-01, -5.4928e-05,
         1.4326e-01, -2.0733e-01, -5.7671e-02,  8.4334e-02,  5.5065e-02,
        -1.9623e-01,  6.2442e-03,  8.1930e-03, -1.1120e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4223,  7.8125,  0.9950, -0.2013, -0.4153,  0.0456, -0.0624,  1.2347,
         0.0443,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7073, -6.5340, -0.6572,  0.5908,  0.4441, -0.1989,  0.2537,  0.0816,
         0.0433,  0.3573, -0.0794, -0.0179,  0.1377,  0.0723,  0.2192, -0.2205,
        -0.0862,  0.1281,  0.0775,  0.1939, -0.3473,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2770,  9.8104,  0.1979,  0.2983, -0.1702, -0.2338, -0.4485, -0.0531,
        -0.3909,  0.1561,  0.0131, -0.2505, -0.2075,  0.4313,  0.0231, -0.0766,
        -0.4411, -0.2115,  0.0497, -0.0807, -0.0512, -0.0359,  0.1534, -0.3880,
         0.1112,  0.4536,  0.4467,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6783,  6.4413,  0.3313,  0.0529, -0.0599, -0.0903, -0.3382,  0.5139,
        -0.0414, -0.2301,  0.1233,  0.1635, -0.0463,  0.0153, -0.2875,  0.0340,
         0.1590,  0.0717,  0.0510,  0.0754, -0.1006, -0.0529, -0.1022, -0.0300,
         0.2154,  0.0138, -0.1736, -0.0702, -0.0851,  0.3204,  0.0907, -0.0512,
         0.0506, -0.1407, -0.0441, -0.0777, -0.0351,  0.2234,  0.4105,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9406, -7.1274,  0.1567,  0.2234,  0.3127,  0.0157, -0.1208, -0.0613,
        -0.6393, -1.3340, -0.2020, -0.2979,  0.0500,  0.1981, -0.0870, -0.4829,
        -0.1133,  0.3857, -0.3316,  0.4618,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2821e-01, -3.8791e+00, -4.1590e-01,  1.9714e-01, -4.1805e-01,
        -1.0272e-01,  1.0613e-01,  7.6998e-01,  1.5552e-01, -1.5751e-01,
        -2.0401e-01, -4.1418e-02, -1.6793e-01, -1.7301e-01, -3.2807e-01,
        -2.4140e-02,  2.5179e-01,  3.3872e-02, -5.9155e-02,  5.8362e-04,
         3.3303e-01,  4.3011e-01, -1.2037e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0826, -1.5102,  0.4555,  0.0765, -0.0074, -0.1064, -0.0650, -0.0761,
        -0.0219,  0.0555, -0.0643, -0.0429, -0.0158, -0.0287, -0.0248, -0.1789,
        -0.0619,  0.0693,  0.0061, -0.0030,  0.0536,  0.0200,  0.0403, -0.1021,
         0.1310,  0.0441,  0.0517,  0.0924,  0.0151,  0.1125,  0.0303,  0.0395,
        -0.0196,  0.0289,  0.0743, -0.0090,  0.1339,  0.0428, -0.0557,  0.1030],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1901, -6.4164, -0.1421, -0.3488, -0.2033,  0.1247,  0.2491,  0.1904,
        -0.0426, -0.1602,  0.1407,  0.1113,  0.1429,  0.0389,  0.0536,  0.0087,
         0.0074,  0.0791,  0.1008, -0.0091,  0.1474,  0.0931, -0.0546, -0.0975,
         0.0773,  0.1812,  0.2487, -0.0125, -0.0440,  0.0741, -0.0472,  0.0956,
         0.1049,  0.2039,  0.2527, -0.4587,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1359,  3.2205,  0.0385, -0.2192,  0.0186, -0.0418,  0.1070,  0.0405,
        -0.1550,  0.0318,  0.0959,  0.1088,  0.0282, -0.0545, -0.0119,  0.0593,
         0.0421, -0.2444, -0.1165,  0.0306, -0.0533,  0.0140,  0.0433,  0.1034,
         0.0430, -0.0440,  0.0123,  0.1898, -0.0673, -0.0527, -0.0775,  0.4116,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1977, -3.2718,  0.1878, -0.0821,  0.0886, -0.0658, -0.0996, -0.1589,
         0.0384,  0.2179,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3629,  1.6893, -3.3707, -0.5396, -0.5772, -1.2349, -0.7666,  0.2948,
         3.3644,  0.1371,  1.1301, -3.6669, -6.7403,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.3574,  2.0505, -0.0104, -0.1444, -0.0820,  0.2549, -0.2346,  0.0581,
        -0.0107,  0.1009,  0.2124,  0.0224, -0.2356, -0.2967, -0.3159,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2496e+00, -8.1855e+00, -7.9789e-01, -7.5581e-01, -4.2665e-01,
        -4.0105e-01, -4.4948e-02,  2.5156e-01,  5.7855e-02,  6.3611e-02,
         5.4303e-01,  1.3155e-01,  1.2572e-01,  1.6329e-01,  2.2027e-02,
         1.4004e-03,  1.2034e-02,  1.5516e-01,  4.0639e-01,  1.1515e-01,
        -1.5722e-01,  2.5612e-01, -4.1473e-01, -1.0796e-01,  7.3910e-02,
         1.0289e-01,  3.7594e-02, -1.5747e-01,  1.6333e-02, -2.2074e-01,
         2.9383e-01, -6.1579e-01, -5.8456e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.1374, -13.4526,   0.1788,   0.7190,  -0.4277,   1.9664,  -0.4293,
         -0.7681,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3196e+00, -1.0184e+01, -1.7693e+00, -1.0279e-01, -3.2410e-01,
         9.3952e-02,  5.0858e-01,  8.9341e-02,  1.4776e-01,  2.3076e-01,
        -1.4372e-01,  7.1125e-02,  5.0508e-02, -2.3812e-01, -2.3234e-03,
         6.6888e-01,  1.1204e-01,  9.4070e-02, -1.6105e-01, -1.4368e-02,
         1.8177e-01,  1.4396e-01,  1.4349e-01,  4.5652e-01,  1.4228e-01,
         2.3406e-01, -1.7379e-01,  1.1636e-01,  5.3648e-01, -1.1271e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1622, -3.3871, -0.8968, -0.5149,  0.0574, -0.1298, -0.0279,  0.0332,
         0.0482, -0.0211, -0.0047, -0.2773, -0.1475, -0.0422,  0.3251,  0.4178,
         0.5393,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2170, -1.1260,  0.1748,  0.0926, -0.0168, -0.1636, -0.0068,  0.0369,
         0.0225, -0.0139, -0.0699,  0.0036,  0.0495, -0.0083,  0.1827,  0.0239,
        -0.0542,  0.0119,  0.0231, -0.1010, -0.0643, -0.1367,  0.0595, -0.0079,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1306,  4.0729,  0.4050,  0.2280, -0.0841, -0.0294,  0.3555, -0.0875,
         0.2388, -0.0426,  0.0668,  0.1100,  0.0609, -0.0621,  0.1461, -0.0648,
        -0.3294, -0.0097, -0.0491,  0.0968, -0.0361,  0.0193,  0.0377,  0.0723,
         0.0085,  0.0845,  0.0301, -0.0439, -0.1347,  0.0656,  0.0251,  0.0418,
         0.0091, -0.0793, -0.1018,  0.0200, -0.2105,  0.0833,  0.0327,  0.0816,
         0.1532, -0.0482,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3733e-01,  4.3300e+00,  1.5212e+00,  2.1013e-01,  2.1315e-01,
        -2.9863e-01, -1.6470e-01,  4.1876e-01,  2.1806e-01, -9.0903e-02,
        -4.0857e-03,  1.0691e-01, -6.5882e-02, -1.3370e-01,  5.3349e-02,
         2.1444e-02,  1.3374e-02, -5.9741e-02, -2.8962e-02,  5.2558e-02,
        -5.7520e-02,  6.6485e-02,  6.0712e-03, -1.8964e-01,  5.2441e-03,
         9.1929e-03,  5.5398e-02,  9.1816e-02,  2.2061e-02,  1.7628e-01,
         3.0882e-02, -1.4698e-01, -3.2638e-02, -7.9413e-02, -1.3527e-01,
        -8.5843e-02, -1.2723e-02, -1.2651e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8461, -7.3205,  0.3320,  0.0185,  0.5773, -0.2553, -0.5865, -0.1425,
        -0.3396,  0.0165, -0.5538,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3788,  4.3625,  0.2449, -0.1401,  0.0542, -0.1913, -0.1364,  0.0065,
        -0.0671, -0.0266, -0.3481, -0.1185, -0.0054,  0.0282,  0.0872,  0.0260,
         0.0852,  0.0698,  0.0529, -0.2021,  0.0055,  0.0216, -0.0324,  0.0595,
        -0.0442,  0.0056,  0.0152, -0.1001,  0.1078, -0.0174,  0.0276,  0.1374,
        -0.1070,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6060e-01,  1.6972e+00,  5.1209e-03,  3.1170e-02,  4.7354e-02,
        -2.3730e-02, -2.6177e-02,  7.9575e-02, -2.1441e-02, -1.7569e-01,
        -9.9793e-02, -7.3424e-02, -4.8223e-02, -4.6162e-02,  2.8825e-02,
         4.6952e-02, -3.9344e-02, -2.9130e-02,  4.1537e-02, -5.3593e-02,
        -2.8778e-02, -4.5366e-03, -4.4721e-03,  4.2013e-02,  2.6546e-02,
         2.1211e-02,  9.6355e-03,  1.5411e-02,  1.3016e-02, -1.5661e-02,
        -1.6142e-02,  4.8046e-03, -7.7531e-02,  4.0271e-02,  4.1332e-02,
         6.5127e-02,  6.6387e-02,  5.9931e-03,  1.4094e-02,  8.0904e-03,
         1.0628e-01,  3.3091e-02, -1.3220e-03, -3.1391e-02,  2.8074e-02,
        -1.3936e-02,  2.4095e-03, -8.6846e-03, -1.7105e-02, -7.0059e-03,
        -6.3988e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.0597e-01,  3.9217e+00, -8.0239e-03, -8.5087e-02,  2.6195e-02,
        -1.4952e-01, -1.9355e-02,  9.7469e-02, -1.0361e-01, -7.2140e-02,
         8.6431e-03, -2.0589e-02, -8.9378e-02, -3.8390e-02, -3.0300e-03,
        -9.8679e-03, -3.7820e-02,  1.4626e-02, -3.8062e-02, -1.0363e-02,
        -3.9644e-02, -1.7594e-01, -1.0377e-01, -2.7970e-02, -8.5108e-02,
         2.4244e-02, -1.6964e-01,  4.9319e-02, -2.9759e-02, -7.3163e-02,
         3.3504e-02, -1.4507e-02, -4.5502e-02, -1.7278e-01, -2.7722e-02,
        -1.4734e-01,  3.4023e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-2.4192e-01, -3.0346e+00,  3.0541e-01, -2.5281e-01,  4.4553e-02,
         7.8056e-03,  3.9520e-02,  1.0490e-01,  1.9641e-02,  2.3447e-03,
         1.7034e-01,  6.1378e-02, -8.9161e-02, -6.8186e-02, -4.0411e-02,
        -2.3367e-02,  5.9945e-02,  5.7235e-02, -1.3542e-01,  3.2369e-02,
        -8.0650e-02, -5.1462e-02,  4.6160e-02,  8.3277e-02, -2.5159e-02,
        -7.6215e-03, -3.3271e-02, -1.9594e-02, -8.1705e-02, -2.1329e-02,
        -2.9322e-02, -2.1923e-02,  5.6678e-02,  4.2093e-02,  8.3441e-02,
        -3.6220e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3249e-01,  4.9528e+00,  2.1789e+00,  7.4628e-01, -1.4885e-02,
        -8.2235e-02,  1.6458e-02,  2.6682e-02,  2.1305e-01, -1.4286e-01,
        -1.0332e-01, -5.8143e-03,  1.6595e-02, -9.8103e-02,  2.8754e-03,
        -1.1739e-01,  4.4452e-02,  8.5191e-03,  1.0671e-02, -1.0442e-01,
        -7.0877e-02, -9.2356e-02,  7.4929e-03, -3.4591e-02, -8.4515e-02,
        -2.2282e-02, -2.8638e-02,  4.4368e-02, -1.5044e-01,  2.7474e-02,
         2.4331e-02, -1.2912e-01,  2.2062e-02, -6.5821e-02,  2.8163e-02,
        -3.6938e-02, -4.5745e-02,  8.4836e-02, -1.2970e-02,  2.6419e-03,
        -1.7382e-02,  2.3929e-01, -1.4281e-01, -5.0680e-02,  4.9333e-02,
         6.7276e-02, -5.7283e-02, -1.0369e-01, -8.4914e-02,  7.0511e-02,
         9.3603e-02, -9.6367e-03,  1.1745e-01,  2.2231e-02,  3.0853e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1120, -2.8329, -0.3664, -0.0963, -0.1107, -0.0310,  0.0519, -0.0761,
        -0.1583, -0.0119,  0.0526,  0.0718,  0.2900,  0.1530,  0.4135,  0.1631,
         0.0388, -0.0461,  0.2703,  0.1573,  0.1794, -0.1706,  0.2629,  0.0132,
         0.0109, -0.4886,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3811, 13.1583,  2.0901, -0.5894, -0.1373, -0.3033,  0.6300,  0.1121,
         0.2779, -0.1778, -0.2557, -0.3011, -0.3673, -0.3142, -0.2184, -0.1493,
         0.6742, -0.1305, -0.5561,  0.4090,  0.0911, -0.2295,  0.3346,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6781, 15.2028, -0.2843, -0.6193, -0.0778, -0.5635,  0.3616, -0.1288,
        -0.2212, -0.3577,  0.0449, -0.0283,  0.1934,  0.1307,  0.2814,  0.1884,
        -0.0249, -0.2299,  0.2045,  0.1927, -0.1060, -0.0347, -0.2458, -0.3111,
         0.1731, -0.3316, -0.0311,  0.7647,  0.3102,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5428, -6.5192, -1.2266, -0.4650, -0.8474, -0.3536, -0.8215,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0409, -3.1609,  0.5073,  0.0091, -0.0561,  0.1586,  0.1567,  0.1150,
        -0.0393,  0.1133, -0.0270, -0.2829,  0.0056, -0.0634, -0.1847,  0.1034,
         0.0428, -0.4697,  0.0796,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5026, -8.6218, -0.6190, -0.6069, -0.4505,  0.4222,  0.1156,  0.5365,
        -0.6420, -0.1537, -0.0562, -0.0596,  0.2388, -0.5905, -0.3807,  0.0448,
         0.1194,  0.0162, -0.0274, -0.0107, -0.0777,  0.1764,  0.0388,  0.1685,
         0.0412, -0.0314,  0.6741, -0.1777,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5573, -3.5296,  0.0222, -0.3265,  0.0507,  0.0852,  0.1386,  0.0341,
        -0.2166,  0.0565,  0.0604,  0.0370,  0.0158,  0.1310,  0.1255,  0.0795,
        -0.0621,  0.1885,  0.2329,  0.0288,  0.0887,  0.0987,  0.0234,  0.1462,
         0.1823,  0.0244,  0.0254,  0.0403,  0.3038, -0.2125, -0.2437,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0677, -4.2133, -0.2271, -0.2714, -0.2186,  0.2684, -0.3082,  0.8217,
        -0.0350, -0.1262,  0.0682,  0.1764,  0.1359, -0.4595,  1.7509,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1934e+00,  6.3548e+00, -4.0695e-01,  1.1180e-01,  8.6881e-02,
         3.1497e-01, -4.3418e-01, -6.3323e-01,  1.7898e-01, -5.5809e-02,
         1.5300e-01, -2.3139e-03, -1.0893e-01,  7.5771e-02,  9.6950e-02,
         2.9538e-01,  1.3807e-01,  1.0164e-01,  1.8459e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5396e-02,  9.0995e+00,  1.0504e-01, -6.0704e-01,  3.6223e-02,
        -4.7826e-03, -3.7935e-01, -3.8867e-01, -9.3762e-02,  9.9625e-02,
         1.0676e-01,  1.7013e-02,  1.4849e-01,  1.3002e-01, -1.4016e-01,
         3.4691e-01,  6.5574e-01,  1.2331e-02, -7.9558e-03, -1.9669e-01,
         2.1827e-01,  3.1853e-02,  1.2849e-01, -2.8143e-02, -1.1948e-02,
         8.1939e-02, -1.6397e-01,  1.0140e-01, -4.0180e-01,  4.3598e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.2051, -4.0254,  0.3186,  0.1154,  0.1419,  0.1841,  0.4912,  0.0512,
         0.4998,  0.3574,  0.4243,  0.0681,  0.1135, -0.0595, -0.0732, -0.1018,
         0.1020,  0.0970, -0.0082,  0.0784,  0.5779, -0.1134,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4097e-02, -6.8209e+00, -1.4311e-01, -5.7181e-03, -9.5433e-02,
        -5.7169e-02,  9.0144e-02, -2.6883e-02, -5.6211e-02,  1.5530e-02,
         4.9940e-01,  1.7538e-01,  2.0644e-01,  4.0636e-02, -1.6467e-01,
         2.1874e-02,  2.0917e-02,  1.2077e-01, -7.4638e-02, -2.7500e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2492, 10.7879,  0.2071,  0.5146,  0.6718,  0.0548, -0.4089,  0.2010,
        -0.0459,  0.0831, -0.3062, -0.2124, -0.0797, -0.3132,  0.1589, -0.0506,
        -0.1513,  0.0243,  0.1035, -0.1186,  0.0398,  0.0295,  0.0237,  0.1244,
         0.8999,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3163, -6.8627,  0.2261, -0.0750,  0.2718,  0.1574, -0.1038,  0.0843,
        -0.0653,  0.3608, -0.3020,  0.4078,  0.1955, -0.1161, -0.0983, -0.0708,
        -0.2139,  0.1732, -0.0963,  0.1496,  0.0701, -0.1334, -0.1377,  0.0655,
        -0.0618, -0.0176,  0.0482, -0.3442, -0.1416,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0352, -4.0952,  0.5830, -0.1264,  0.0240,  0.2655, -0.3237,  0.0415,
        -0.0586,  0.0694, -1.0787,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3301e-01, -4.1612e+00,  1.3306e-01,  2.5742e-03,  8.0620e-02,
        -4.0401e-02,  1.9330e-01, -1.5297e-01, -2.4326e-01, -1.0302e-01,
         4.6630e-02, -1.9417e-01, -1.2099e-01, -2.2195e-01, -6.2220e-01,
        -3.8985e-02, -1.7950e-02, -8.9033e-02, -2.1868e-01,  6.6594e-02,
        -1.5819e-01, -5.2714e-02, -4.0512e-01,  2.8984e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6558e-01,  4.9361e+00,  4.0878e-01, -4.5903e-02,  3.8055e-02,
        -2.8371e-02,  1.3857e-01,  1.0198e-01, -4.7256e-02,  8.9769e-02,
         7.9899e-02,  1.0858e-01,  1.2327e-01,  4.1033e-03,  7.3260e-02,
        -3.5227e-02, -4.6850e-02,  4.9085e-04,  3.1857e-02,  2.8315e-02,
         4.7458e-02, -5.7205e-02, -3.9489e-02,  2.2393e-01,  1.1200e-01,
        -3.2205e-02,  1.1068e-01,  9.8628e-02,  1.6242e-02, -5.6617e-02,
        -6.0561e-03, -8.1070e-02, -2.9935e-02, -7.4237e-02,  2.0847e-02,
         3.6744e-02, -1.7523e-02,  6.4262e-02,  3.1320e-02, -4.7969e-02,
         2.2758e-02,  3.8882e-02,  6.2132e-02, -5.3100e-02, -3.9698e-03,
        -1.9782e-01,  2.7944e-01,  6.1143e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3784,  3.9968, -0.0157, -0.1139, -0.2193, -0.0938, -0.3027, -0.1428,
         0.3980,  0.0176, -0.0577, -0.1170, -0.1402, -0.1097,  0.0502, -0.0233,
         0.5853,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0938,  8.7406, -0.6085,  0.1457,  0.1792,  0.6718, -1.1523,  0.2749,
         0.7718,  1.0488,  0.0117, -0.6077, -0.3969, -0.8302,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4727e+00,  1.1192e+01,  3.3183e-01, -6.3441e-01, -1.5338e-01,
         7.0682e-02, -5.0796e-02, -1.3025e-01, -1.8401e-01,  6.1446e-01,
         9.6874e-02, -6.4044e-02, -9.7711e-02, -6.8640e-02, -9.4807e-02,
        -2.9533e-02,  8.0183e-02,  3.4044e-02,  1.8714e-01,  3.6997e-01,
         1.9895e-02,  4.3861e-02, -1.1271e-01,  5.2011e-02, -1.4894e-01,
        -8.4115e-02,  9.0153e-02, -1.3191e-01,  1.4597e-02, -1.1696e-01,
        -3.8429e-03, -6.1805e-02,  2.1946e-01,  2.6625e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8262e-01, -4.3433e+00,  1.1348e+00, -2.5289e-01, -7.3366e-02,
         4.1670e-02,  6.1802e-02, -1.7474e-01,  1.0116e-01,  5.2276e-02,
         4.1779e-03,  4.0082e-02, -3.4951e-02, -1.8340e-02,  1.2120e-03,
         9.4438e-02,  1.2397e-02,  6.4213e-02,  1.7788e-02, -8.3933e-02,
         8.7292e-02,  3.1426e-03, -1.1260e-02, -3.3366e-03, -3.6230e-02,
         1.5531e-02,  7.8309e-04, -2.0840e-02, -3.3387e-02,  7.0977e-02,
         7.2400e-03, -4.9515e-03,  2.5055e-01, -1.6849e-03,  2.3867e-02,
        -2.5433e-02,  5.4060e-02,  5.1919e-02,  3.1077e-03,  6.7270e-02,
         3.3580e-03, -2.2414e-01, -3.5856e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5917e-01,  7.2201e+00,  6.1369e-01,  4.2324e-02,  2.8584e-02,
        -4.5160e-02, -5.6200e-02, -1.6606e-01, -3.2487e-01, -2.0342e-01,
        -2.5379e-01,  5.7725e-03,  1.7061e-01, -1.0617e-01, -1.3292e-01,
        -7.9540e-02,  1.6071e-01, -3.9135e-02, -1.2462e-01,  4.0354e-02,
         1.3639e-01,  1.7362e-01, -1.1863e-01, -2.0623e-01, -3.3307e-01,
         8.6508e-02,  9.5989e-02,  3.8473e-02, -9.3405e-02,  4.8611e-02,
        -5.3613e-02,  3.0780e-02, -7.4436e-02,  6.3100e-02,  3.9827e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.7943, -6.7452, -0.3575,  0.6541, -0.0114,  0.5164,  0.1181,  0.5235,
         0.1436, -0.1411, -0.2582, -0.1938, -0.1343, -0.5915,  0.8005, -0.0524,
         0.0724, -0.1600, -0.1338, -0.0754, -0.3442,  0.0219, -0.4542,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4948e-03, -4.3126e+00, -6.1216e-01,  1.9900e-01,  2.9824e-01,
        -1.1669e-01,  2.2553e-01,  7.6099e-02,  5.6829e-03,  3.3767e-02,
         7.2003e-02,  2.2718e-02, -7.3318e-02, -1.3958e-02,  6.9481e-02,
         4.8309e-02,  3.0376e-02,  3.3985e-02,  6.2854e-01,  2.0106e-01,
        -9.8471e-02,  7.7053e-02,  3.3728e-02, -1.6852e-02, -1.2416e-01,
         9.5037e-03,  6.1099e-02, -3.1774e-02, -5.5085e-02,  1.2864e-01,
        -3.5179e-02, -3.8440e-02,  1.9441e-02, -5.8888e-02, -2.4126e-02,
        -1.2720e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0114,  9.4033,  2.5166,  0.1982,  0.3694, -0.6343, -0.0702,  0.4895,
        -0.1775,  0.0571,  0.2730, -0.0117,  0.3437, -0.3433, -0.7985,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4999, -5.9560,  0.0207, -0.0075,  0.0983,  0.2950,  0.3137, -0.0285,
         0.1474,  0.1009,  0.4378,  0.1780, -0.0213, -0.1560, -0.2840,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0991e-01, -4.3603e+00, -6.4845e-01,  1.3743e-01, -1.2978e-01,
        -4.9621e-02,  2.5518e-01,  8.2000e-02,  7.0529e-02,  1.2758e-02,
         1.5641e-01, -2.7972e-02,  3.0673e-01, -2.9665e-02, -1.5005e-01,
        -2.6861e-03, -8.3916e-02, -4.6902e-02, -2.5409e-02, -8.0223e-02,
        -4.3320e-02, -6.0187e-02, -1.0395e-01,  4.4236e-01, -2.3826e-01,
         3.5966e-02, -3.6015e-02,  9.7516e-02,  3.3900e-01, -1.0588e-01,
        -3.9203e-02, -1.7281e-01, -2.2491e-01, -1.3289e-01, -4.5818e-02,
        -1.3014e-01,  7.7212e-02, -2.6180e-02, -2.2746e-02,  1.7061e-02,
        -4.6223e-04, -1.5660e-01, -1.1630e-02, -2.6408e-02,  1.9015e-01,
        -4.8199e-02, -3.2171e-02, -1.4181e-02, -7.5989e-02,  6.8476e-02,
         2.1810e-01,  4.9416e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7934, -7.0692,  0.3035,  0.2111,  0.3646,  0.3898,  0.0942,  0.0697,
         0.0278,  0.1289, -0.0725, -0.0837, -0.0279,  0.1862,  0.1745, -0.0506,
         0.1637, -0.1368,  0.0362, -0.5448,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0252e-01,  3.3469e+00,  6.9450e-01,  9.6792e-02,  2.7285e-03,
        -7.1218e-02, -7.8076e-02,  6.0191e-02, -2.7011e-02, -7.3107e-02,
         5.6329e-02,  7.1215e-02,  8.3589e-02,  4.5778e-02, -1.4013e-03,
        -9.3957e-03, -2.0958e-02, -1.7209e-02, -1.3576e-01, -1.2889e-02,
        -6.8871e-02, -4.1631e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4192,  5.7304,  0.5115,  0.1944,  0.0254,  0.1236,  0.1441,  0.1699,
         0.2410,  0.1922, -0.1571, -0.0193, -0.1466,  0.0153, -0.1139,  0.0895,
        -0.1290, -0.7214,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5482e-02, -3.1150e+00, -6.9247e-02, -1.2855e-01,  3.7282e-02,
        -1.1598e-01,  4.9377e-02,  1.8543e-01,  1.4354e-02, -4.1411e-02,
         8.1593e-03, -7.2074e-02,  1.0594e-02,  2.1582e-04,  1.4816e-02,
        -4.6770e-02, -1.1434e-01, -7.6183e-03, -3.2161e-03,  3.4926e-03,
         1.7823e-02,  3.6419e-02, -9.5767e-02,  1.5935e-02, -1.0599e-01,
         4.4797e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.4846e-02, -2.4562e+00, -1.6148e-01,  6.7144e-02, -9.6876e-03,
        -7.4294e-02,  1.7890e-04, -3.3805e-02, -9.1160e-02, -2.0949e-01,
        -4.4629e-02,  4.0011e-02, -1.4223e-01,  1.7751e-02, -1.1654e-01,
         4.8815e-02, -4.6008e-02, -1.1199e-01, -1.7699e-02, -8.3838e-02,
         3.7690e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9284e-02, -4.4722e+00,  4.8904e-01, -1.0862e-01,  7.6352e-02,
         1.7906e-01,  1.7884e-02,  2.3515e-01, -5.0530e-02, -2.0347e-01,
        -7.1705e-03, -1.8031e-01,  1.0652e-02,  1.1916e-01, -1.0057e-01,
         1.2233e-02,  1.0127e-01,  4.2987e-03, -7.2756e-02,  2.5656e-02,
         8.4862e-02, -5.1257e-02, -8.6018e-02, -3.6295e-02, -1.2546e-01,
         1.8591e-01,  2.7066e-02, -5.5469e-02, -2.4781e-01,  1.1524e-01,
        -2.6899e-01,  4.5567e-02,  3.4956e-02, -5.2151e-02,  5.0066e-02,
        -7.6287e-02,  1.7384e-02,  6.5908e-02,  1.9188e-01,  3.7075e-02,
        -4.8662e-02, -4.3470e-02,  6.7647e-03, -2.0082e-02, -1.7560e-02,
         4.8297e-01, -6.7577e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8773, -4.7252,  0.7895,  0.4897,  0.3135,  0.1932,  0.0751,  0.5704,
         0.5380, -0.0571,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.8247e-01, -3.4540e+00, -1.7936e-01, -2.1136e-01, -2.1372e-02,
         4.7563e-02,  1.7287e-02,  1.0468e-02,  2.0145e-02,  7.9134e-02,
        -3.5086e-02, -2.2475e-02,  1.0679e-02,  3.1506e-02, -2.9290e-02,
         3.7582e-02,  1.9839e-02,  5.7646e-03, -1.7629e-03,  2.4772e-02,
        -6.5590e-03,  1.7023e-01,  3.7705e-02,  3.7528e-02, -4.6642e-02,
        -3.1540e-03, -4.3760e-02, -2.2010e-02, -3.7422e-02,  1.4368e-03,
         4.4883e-02,  3.3295e-02, -4.9646e-02, -2.4860e-02, -2.0167e-02,
         2.6871e-02,  3.2663e-03,  1.0357e-02, -3.7812e-02, -4.4862e-02,
         2.7335e-02, -7.7116e-03,  6.3359e-03,  1.0305e-02, -8.7359e-02,
         1.0060e-01, -6.3925e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.5534e-01, -7.1056e+00, -4.2626e-01,  1.0122e+00, -4.1702e-02,
         1.2289e-01,  3.9113e-01,  6.9548e-01,  1.2266e-01, -1.0687e-03,
         5.8031e-04,  4.0756e-01,  1.3703e-01,  2.1877e-01,  3.9375e-01,
        -9.7777e-02,  9.7822e-02, -4.0893e-02,  1.1253e-01,  1.4200e-02,
         1.8797e-01,  1.1013e-01, -3.6239e-02, -1.3937e-01, -1.2309e-01,
         1.2690e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1504e-02, -2.4685e+00, -2.9012e-01, -2.3289e-02, -4.0840e-02,
        -1.4523e-01, -4.4747e-02, -3.4434e-02,  5.5908e-02,  2.4115e-01,
         1.1079e-01, -1.8647e-02, -4.1542e-02, -7.2548e-02, -4.6034e-02,
         4.0447e-02, -5.1459e-02, -5.6077e-02, -1.9677e-04,  1.6222e-02,
         2.1111e-02,  2.2328e-01, -1.0845e-02, -7.7563e-04,  2.2943e-02,
         3.6879e-02, -3.9144e-02, -1.4276e-01,  1.0887e-02, -3.5790e-02,
         3.6890e-02, -6.0251e-02, -2.2508e-02, -6.3835e-05, -1.0666e-01,
        -2.0763e-01,  1.8475e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7093,  4.7019, -1.9749, -0.5330,  0.0999,  0.1073, -0.3982, -0.0261,
        -0.1739,  0.3961, -0.0811,  0.0124,  0.4226, -0.1571,  0.2399,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.6745e-01, -6.4137e+00, -3.1377e-01, -1.7354e-01, -2.8911e-01,
        -4.7487e-01, -3.5986e-03, -1.4629e-01, -3.0058e-01, -9.2374e-02,
        -1.1741e-01, -2.9052e-01, -2.0326e-01,  3.4253e-01, -3.7283e-01,
         2.5809e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9748e-01, -6.3684e+00, -4.0619e-01,  2.7794e-01, -5.4512e-03,
         6.6843e-01,  9.9636e-02,  2.5828e-01,  2.2609e-01, -1.5945e-01,
         3.3839e-01,  1.0674e-01, -1.2484e-01, -9.4678e-03,  8.9846e-02,
        -1.4520e-01,  1.9760e-01, -1.1921e-02,  7.0130e-02,  5.8002e-03,
         2.6637e-01,  3.1104e-02,  1.1462e-01,  2.4088e-02,  1.9012e-02,
         1.7398e-01, -5.5042e-02, -2.6903e-02, -5.1810e-01, -1.8259e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6155, -5.9268, -1.1146,  0.2580,  0.0110, -0.0162, -0.1167, -0.0912,
        -0.0630,  0.1989,  0.1360,  0.1205,  0.0407, -0.0652,  0.1004, -0.0089,
         0.0917, -0.1034, -0.0137,  0.0449,  0.0246, -0.0190, -0.0591,  0.1310,
        -0.0514,  0.0541, -0.1693, -0.0075, -0.0208, -0.0236,  0.2051, -0.0959,
         0.1778,  0.0501,  0.1513, -0.1121,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3129e-02, -4.3636e+00, -1.6965e-02,  4.6487e-02,  1.2192e-01,
         7.6648e-02,  4.4592e-02,  1.2810e-01,  3.6642e-01, -1.6058e-01,
         1.3330e-02, -7.6511e-03,  1.3447e-01,  8.5103e-03,  8.3203e-03,
        -1.4479e-02,  5.0286e-02, -4.4347e-02,  4.6599e-02,  3.6882e-02,
        -7.1243e-02, -4.5735e-02, -2.9858e-02, -1.8587e-01, -1.6050e-02,
        -3.7713e-02,  9.6658e-02, -6.7332e-03, -8.5451e-02,  3.1383e-03,
         5.0598e-02, -1.0940e-01, -2.3858e-02,  2.0107e-01, -3.4913e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4358, -3.9340,  0.2650, -2.1184, -0.6460, -0.3186,  0.2967,  0.4859,
        -0.4798,  0.3090, -0.8255,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5909e-01, -3.0352e+00,  4.7726e-02,  1.3802e-01, -2.7698e-02,
        -3.3714e-02,  9.2278e-02, -4.4905e-03,  1.8083e-03, -2.0016e-05,
         7.9168e-02,  5.8079e-03, -1.8833e-01,  2.4469e-02,  2.8567e-02,
         1.3650e-01,  1.0886e-01, -1.7230e-02,  4.7684e-02,  5.5662e-02,
        -7.2407e-02,  1.1789e-01, -6.2395e-02, -1.9017e-01,  1.1742e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2085,  5.5658,  0.6856, -0.0391,  0.1828, -0.1185, -0.0072, -0.0330,
        -0.0293, -0.1329,  0.0314, -0.0186,  0.0115,  0.0726,  0.2113,  0.1660,
        -0.2618,  0.0908,  0.2207,  0.2332,  0.3153,  0.3307,  0.1021, -0.2128,
        -0.2847,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2857,  3.7519,  0.1344,  0.0625,  0.1247,  0.0413, -0.0191,  0.0106,
         0.0517,  0.2336, -0.0894, -0.0923, -0.0261,  0.0558,  0.0101,  0.0141,
        -0.1283, -0.0305, -0.0485, -0.1158,  0.0993, -0.0729, -0.1006,  0.0506,
        -0.0134, -0.1167,  0.0729,  0.1680, -0.0374, -0.2217, -0.1646, -0.0901,
         0.2004, -0.0098,  0.0144, -0.0223, -0.0041,  0.0208,  0.0941,  0.0194,
        -0.0152, -0.0143, -0.0193, -0.0251, -0.0339, -0.0821, -0.0355,  0.0339,
         0.0265, -0.0425,  0.1300, -0.0318, -0.0102,  0.3723], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-0.4381, -7.5162, -0.7859, -0.1770,  0.3061, -0.0286,  0.4592,  0.0240,
        -0.2320,  0.2051, -0.0211,  0.1899, -0.3196, -0.6041,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9218e-01, -5.4196e+00, -1.7981e-01,  1.3173e-01, -2.0381e-01,
        -2.5541e-01, -2.2262e-01, -2.3210e-01, -8.0908e-02, -3.4972e-03,
        -1.6513e-01,  9.8878e-02,  7.5706e-02, -4.1959e-02,  6.9869e-03,
         3.3901e-01, -2.2613e-02, -9.7440e-02,  1.0581e-01, -2.1025e-02,
         5.7817e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3315, -5.7752,  0.0836,  0.2880, -0.1981,  0.0725,  0.7094, -0.4041,
         0.4913,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0093, -8.4871, -0.0900,  0.4681, -0.2267,  0.2411, -0.0486,  0.4801,
         0.1297, -0.2042,  0.2512, -0.1372,  0.1620,  0.2978,  0.0351,  0.0323,
         0.1395, -0.0488,  0.0197, -0.0352,  0.0809,  0.2016,  0.0787,  0.3017,
         0.2895, -0.0168,  0.0892,  0.3475,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2767e-01, -5.9286e+00, -3.5919e-01,  1.2818e-02,  1.5121e-01,
         1.2638e-01,  7.3456e-02,  1.7013e-01,  1.1357e-01,  1.8150e-01,
         5.7622e-02, -1.1234e-01,  1.2482e-03,  5.5687e-02,  3.5581e-02,
        -1.9356e-01, -2.0079e-01, -1.1252e-01,  9.0273e-02,  3.7474e-01,
        -2.7732e-01,  1.7495e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7093e+00,  1.0497e+01, -7.7073e-01,  2.2463e-01, -1.0143e-01,
         2.0077e-01,  2.7759e-03,  6.2096e-02,  2.1027e-02,  2.1158e-01,
         7.6232e-02,  1.5534e-01, -3.5275e-01,  1.1417e-01, -2.2596e-01,
        -2.3872e-01,  1.0706e-01,  3.6291e-02, -8.6181e-02,  4.8355e-02,
        -7.7846e-02,  1.1777e-01, -5.8732e-02,  6.3180e-03,  2.5921e-02,
         1.3001e-01,  1.2729e-02,  4.1589e-02,  1.5477e-01, -1.5735e-01,
         7.5153e-02,  7.3446e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0812e-01,  4.9711e+00,  5.3062e-01,  3.9584e-02,  2.0984e-01,
         1.4065e-01, -5.1442e-03, -6.4546e-02,  3.8711e-03, -2.1449e-01,
         9.8408e-02, -8.5675e-02, -1.5106e-02,  2.1146e-02, -6.7571e-02,
         6.2744e-02, -3.7970e-02,  1.5568e-02,  2.5682e-02, -5.2052e-02,
        -6.8602e-02,  9.1132e-02, -3.0081e-02, -1.9562e-02, -5.4641e-02,
         1.4697e-02,  1.8004e-01,  5.1959e-02, -4.4740e-02,  4.1770e-02,
         1.6192e-01, -7.8808e-02, -1.0532e-01,  2.3371e-02, -2.7978e-02,
         4.9954e-02, -2.6367e-01, -4.2630e-02,  2.5207e-02,  3.5371e-02,
        -1.9785e-01,  6.9949e-03,  6.0404e-01, -2.2254e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3942, -6.5605, -0.5104,  0.1514, -0.3689, -0.3400,  0.1144, -0.3570,
        -0.0433,  0.4935, -0.2612,  0.1732, -0.1326,  0.1728, -0.0238, -0.4381,
        -0.3304,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7250e+00,  1.0005e+01, -1.9319e-01,  6.0187e-01,  4.6657e-01,
        -1.2519e-01, -9.4192e-03,  1.2370e-01, -2.3322e-01,  3.7854e-01,
        -1.6710e-01,  1.9042e-04, -1.4028e-01,  1.7012e-01, -1.8033e-01,
        -1.1217e-02,  7.6994e-02, -1.3101e-02,  1.6196e-01,  2.2185e-01,
        -1.1011e-01,  3.0840e-01,  1.1798e-01, -4.6531e-02, -1.8458e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4747, -6.4812, -0.6007, -0.1435, -0.0236,  0.4062, -0.3026,  0.0830,
        -0.0965, -0.2566, -0.0978, -0.0660, -0.0503, -0.0694, -0.0726, -0.0869,
        -0.2423, -0.0941, -0.1801, -0.3975,  0.4579,  0.0948,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3197, -4.5426,  0.5143, -0.3383,  0.5123,  0.1313, -0.0997,  0.1333,
        -0.3379,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8079, -6.1705,  0.9226, -0.3933, -0.3851, -0.2333,  0.1686, -0.3058,
        -0.1175, -0.1921, -0.2660,  0.3105,  0.2100,  0.1648, -0.1881,  0.2325,
         0.0830, -0.1916,  0.0424,  0.0976, -0.0349, -0.1673,  0.2819,  0.0874,
         0.0897,  0.1916,  0.2136,  0.0791, -0.0699,  0.0662, -0.2130,  0.1624,
         0.0507, -0.2934,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-4.4264e-01,  4.8789e+00,  2.3891e-01,  8.2887e-01,  1.2456e-01,
         1.6089e-01, -6.2685e-01,  2.0468e-01, -1.9501e-01, -1.8481e-02,
         1.3898e-02,  1.4559e-01,  6.0741e-02,  1.3387e-01,  1.8212e-01,
        -8.2628e-02,  2.7478e-02,  1.2753e-01,  9.5804e-02,  5.3693e-02,
        -6.2681e-02, -5.4463e-02,  3.0908e-01, -1.1954e-01,  9.7118e-02,
         3.9593e-02,  6.0538e-02,  3.4708e-02, -1.7596e-02,  7.2393e-02,
        -8.9600e-02, -5.3938e-02,  7.8645e-03, -9.7452e-02, -1.8708e-01,
         1.9136e-02, -4.1809e-03, -1.7409e-01, -8.8470e-02,  5.0003e-02,
        -1.9773e-02, -8.6438e-03,  2.2972e-01, -2.1393e-01,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4509e+00, -8.7786e+00,  6.2811e-01,  4.6277e-01,  6.4783e-02,
        -1.0191e-01, -1.0755e-01, -1.3026e-01, -6.3524e-02,  1.9255e-01,
        -6.4384e-02, -8.0329e-02,  1.5475e-02,  3.8927e-02, -2.0580e-01,
         2.0794e-01,  1.9342e-02,  1.7984e-01,  2.5693e-02,  1.7169e-01,
         3.8459e-01, -1.4220e-03, -7.0212e-01, -5.3702e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6343,  8.9902,  3.4775,  1.7399, -0.1489,  0.3775, -0.1684,  0.0465,
        -0.0535, -0.2969, -0.2773,  0.5742, -1.9678,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4392e-01, -3.7849e+00,  2.3609e-02,  3.9696e-02, -1.1505e-01,
         3.5015e-01, -4.0469e-03,  1.1326e-01, -2.9931e-03,  5.4783e-02,
        -5.8990e-02, -1.7529e-03, -1.2032e-02, -1.8648e-01,  7.1188e-02,
        -1.0600e-01,  2.6122e-01, -3.7660e-03,  5.6216e-03,  2.1773e-02,
         5.7016e-01, -6.0314e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4211, -4.7906,  0.5039,  0.1193,  0.0269, -0.1126,  0.4594, -0.2606,
        -0.1016,  0.0867,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2606e-02, -4.7796e+00,  8.4949e-01, -3.2041e-01, -2.9399e-02,
         2.7663e-02, -7.8602e-02, -5.7352e-02, -8.5351e-02, -2.1504e-01,
         2.8271e-01,  5.9028e-02,  3.4576e-01, -1.9205e-02,  1.0361e-01,
        -6.5438e-02,  5.3909e-02, -1.5670e-02,  4.1375e-03,  9.5148e-02,
         1.5655e-01, -4.7270e-01, -4.9999e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5100e-01, -3.6585e+00, -3.1324e-01,  8.5478e-02,  5.9530e-02,
        -3.8643e-03,  2.3835e-02, -2.1279e-02, -1.1418e-02,  2.1291e-02,
         7.2072e-02, -6.1226e-02,  5.6063e-02, -5.0991e-02,  1.0768e-01,
         3.1358e-02,  8.7447e-02, -1.7654e-02,  8.7873e-02,  1.2164e-02,
        -3.5302e-02,  7.2041e-03, -6.6230e-02, -4.6227e-03,  9.8743e-04,
         1.4628e-02,  4.6546e-02,  2.5507e-03, -1.4941e-02,  2.3761e-02,
        -9.4789e-02, -3.8563e-02,  2.2740e-02,  6.8478e-02,  2.1095e-02,
        -6.7862e-02, -4.1751e-02,  2.5282e-02, -1.0301e-01, -4.2532e-02,
        -3.4905e-02, -3.5576e-02, -2.0910e-02,  9.4571e-02, -2.5055e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3665, -8.9092,  0.2993,  0.7537, -0.0157, -0.0791, -0.2335,  0.3554,
        -0.1983,  0.0173, -0.0513, -0.0289, -0.0185,  0.4337,  0.1837,  0.2526,
        -0.1053, -0.4498,  0.8708,  0.8105,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6578e-01,  3.7236e+00, -1.4629e-01, -1.7813e-01,  1.3695e-01,
         4.8577e-02, -6.5920e-03, -2.3907e-02, -7.8171e-02, -1.7903e-03,
        -2.4555e-02,  6.6264e-02, -3.0392e-01,  6.8823e-02, -8.1680e-02,
        -1.6599e-03, -9.1859e-03,  2.5981e-02, -7.6390e-02,  2.4867e-02,
        -1.3332e-01,  7.6543e-02,  3.0962e-01,  2.8744e-02,  1.2790e-01,
        -5.7969e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8431e+00,  1.4739e+01,  1.1183e-01,  3.5553e-01,  1.7512e-02,
        -7.2951e-02,  1.0168e-02,  2.0123e-01,  2.6351e-01, -1.1687e-02,
         9.9161e-02,  4.2538e-01,  2.1801e-01,  2.7031e-01,  5.1330e-02,
         5.5750e-02,  5.4603e-02,  1.6601e-01,  1.8714e-02, -5.7930e-03,
         9.3519e-02,  1.2384e-01,  1.1979e-01,  2.1311e-01,  4.3128e-02,
         5.7720e-02,  2.1436e-02, -4.4663e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6729, -4.2869, -0.3220, -0.1005,  0.2523,  0.1386, -0.2903, -0.4544,
         0.4853,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3514, -2.8301, -0.0837,  0.0748, -0.2456,  0.0266,  0.0393, -0.0636,
        -0.0712, -0.0413, -0.1507, -0.0785, -0.0594,  0.0403, -0.1294,  0.0241,
         0.1163, -0.2816, -0.1130, -0.1698, -0.0243,  0.1259, -0.0264,  0.0235,
        -0.0415,  0.2986,  0.1339,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 0.2504, -8.5443, -0.7547, -0.2440, -0.0171,  0.1138, -0.1470, -0.1302,
        -0.0614,  0.1029,  0.1415, -0.3205, -0.2224, -0.0159, -0.0897,  0.0321,
         0.0317, -0.0332,  0.0244,  0.1329, -0.3226,  0.1375, -0.2838, -0.1191,
        -0.7999,  0.3232,  0.0173,  0.0469,  0.0613,  0.2193, -0.0396, -0.0765,
         0.0839, -0.1557, -0.0371,  0.0112, -0.1944,  0.2139,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0412e-01,  6.9999e+00,  9.2560e-01,  1.1516e-01, -1.3001e-01,
        -6.3227e-02, -1.5503e-01, -7.3867e-02, -2.4820e-01, -9.6596e-02,
         6.7415e-02, -1.4656e-01, -9.9754e-03,  4.2201e-02,  5.0384e-02,
        -5.0100e-02, -2.0979e-01,  5.1772e-03,  5.8564e-02,  3.2507e-02,
         5.4550e-02,  6.3065e-03,  6.5980e-03,  7.7396e-02, -3.0920e-02,
         5.5904e-02,  4.0257e-02, -6.5642e-02,  2.4892e-02,  1.7558e-01,
         1.5818e-01,  2.7483e-01,  2.7938e-01, -1.5408e-01,  1.0412e-02,
         2.3091e-01,  3.3065e-01,  6.6501e-01,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0796e-01,  8.0821e+00,  5.7854e-01,  1.1346e-01, -8.9666e-02,
         4.5344e-01, -6.3358e-02,  4.6888e-01,  2.8023e-02,  6.0429e-01,
         3.8168e-02, -9.8284e-04, -2.5902e-01, -1.9007e-01, -9.1302e-02,
         1.3200e-01,  1.1598e-01, -1.8824e-01,  3.7756e-01,  1.2941e-01,
         2.4147e-01,  1.6871e-01, -2.2161e-01, -9.9461e-02,  1.1381e-01,
         1.7869e-01, -1.3208e-01,  7.3498e-01, -9.3240e-02, -9.9058e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3350e-02, -5.0244e+00,  6.7270e-01,  1.1185e-02,  4.4444e-02,
        -9.0132e-02,  1.5187e-01, -5.4892e-02, -8.9901e-02, -2.0551e-01,
        -4.3279e-02, -4.3788e-02, -3.2627e-02, -5.7881e-02, -3.8847e-02,
        -9.8038e-03, -2.2527e-01,  2.8243e-03, -1.5034e-01, -3.2561e-02,
        -1.8229e-02,  1.5882e-02, -3.4809e-02, -7.1996e-02, -1.8582e-01,
         1.7813e-02,  4.0000e-02, -9.4361e-02, -1.4384e-02, -1.3631e-02,
        -5.6098e-02, -1.0553e-02,  5.8448e-02, -1.2774e-01,  5.1264e-02,
         2.1759e-03,  2.4465e-02,  5.0074e-02, -5.4038e-03,  8.9933e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6289, -6.9934,  0.7529,  0.1652, -0.1617, -0.0664, -0.0972, -0.1648,
        -0.2355, -0.0965,  0.1695, -0.0087,  0.0837, -0.0793, -0.0478, -0.0096,
        -0.1355,  0.2407, -0.1184, -0.0263, -0.0074, -0.0654,  0.3226, -0.1583,
        -0.0307, -0.2015, -0.1373, -0.0390, -0.0435,  0.3285,  0.4345, -0.0284,
         0.0209,  0.0535, -0.0656, -0.0649, -0.5881, -0.9344,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7902e-02, -3.7830e+00,  3.2950e-01,  1.7065e-01, -5.7178e-02,
         8.1735e-03,  8.6521e-02,  3.5413e-02,  1.2643e-01,  6.2667e-02,
        -2.8717e-02,  9.5410e-02, -9.3482e-03,  1.1325e-01, -1.0665e-01,
        -4.9196e-02, -4.6090e-02,  4.6773e-03, -9.0815e-02, -7.4183e-02,
         3.7289e-04,  2.6177e-01, -2.1952e-01, -1.0355e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5687e-01, -2.9150e+00,  4.2129e-01, -3.9770e-02, -7.3036e-02,
         1.0537e-01, -5.0504e-02,  3.7570e-02,  1.7219e-02, -3.7477e-02,
        -1.4408e-01, -2.5556e-02, -3.1908e-03,  3.2330e-01, -1.9637e-01,
         5.1167e-02,  1.9078e-02, -1.7651e-02,  2.3593e-02, -4.8387e-02,
        -5.1034e-02, -3.2721e-03,  1.9427e-01, -6.0527e-04, -5.0288e-02,
        -1.5958e-04,  7.6224e-02,  7.9201e-03,  5.7797e-02, -3.9823e-02,
        -2.6862e-01, -2.7559e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5173e+00,  7.4399e+00,  2.7295e-02, -2.9468e-01,  1.2623e-01,
        -5.1994e-01, -2.2400e-01, -2.6650e-01, -2.2888e-01, -2.3858e-01,
         1.2168e-01, -7.6511e-02, -3.8878e-01,  6.8885e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6723,  9.1088,  0.6088, -0.2051, -1.1042, -0.1364, -0.1198,  0.1867,
        -0.2198, -0.3342, -0.1405,  0.0496, -0.1205, -0.0249, -0.1544, -0.2051,
        -0.1002,  0.1080, -0.0518,  0.0647,  0.2285, -0.1152, -0.2639,  0.7728,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3302e-02, -3.2051e+00,  5.2609e-02,  6.3500e-02,  9.3638e-03,
         1.0883e-01,  3.4729e-02,  2.1012e-02, -9.6113e-02, -2.6920e-02,
         2.9026e-02,  1.0332e-01,  3.1031e-02, -2.9483e-02, -1.8767e-02,
         1.4267e-03,  5.1785e-02, -1.5684e-01, -1.4093e-01, -4.4828e-02,
        -1.9797e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.2463e-01,  7.7651e+00,  3.3505e-01, -2.3390e-01,  6.4592e-01,
        -1.9144e-01, -1.1852e-01,  1.0800e-02, -3.6264e-01,  3.5701e-01,
        -3.7150e-01, -4.6075e-01,  1.2058e-03,  4.2701e-02, -3.3645e-01,
        -6.1257e-02,  6.7327e-02,  7.1758e-02,  3.3051e-02,  1.9289e-01,
         3.2196e-02,  2.1105e-01,  9.7544e-03, -6.3680e-02, -1.3606e-01,
         4.2645e-01, -2.2128e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0535e-01,  9.6835e+00,  8.4920e-01,  3.1589e-01,  2.8177e-01,
         1.2203e-01, -4.2563e-02,  1.1610e-01, -7.0925e-01,  6.4591e-02,
        -2.3599e-01,  2.8519e-01, -8.9941e-03,  1.8725e-01, -6.7187e-03,
        -1.1380e-01,  8.0622e-03,  1.6295e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-3.5644e-02,  1.3601e+01,  1.4886e-02,  1.3519e+00, -3.6764e-01,
         1.9448e-01,  2.1296e-01,  6.9825e-02, -4.6376e-02,  6.4676e-02,
         7.7810e-02,  1.2651e-01, -1.0654e-01, -2.6077e-01, -1.1121e-01,
        -5.7799e-02,  1.0683e-02,  1.6130e-01, -4.9749e-02, -9.7932e-02,
        -1.1314e-01,  3.2128e-01,  1.1930e-02,  6.2402e-02,  1.6272e-01,
        -2.7389e-01,  9.7896e-02, -1.2712e-01,  3.6360e-02,  1.5700e-03,
        -6.3734e-02, -1.7039e-01, -1.3793e-01, -1.3518e-02,  3.4615e-04,
        -1.1733e-01, -1.2694e-01, -3.0988e-02,  7.1733e-02, -1.2271e-01,
        -9.2083e-02, -1.1445e-01, -2.0928e-01,  1.2416e-01,  5.5174e-01,
         9.7117e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5282e+00,  5.9297e+00,  3.5153e-01, -3.3016e-01, -1.7635e-01,
         4.3685e-02,  8.9020e-02,  8.2702e-02,  5.9313e-02,  4.5110e-02,
         5.2064e-02, -3.7204e-03, -3.1742e-02,  1.1076e-01, -8.2188e-02,
         7.9402e-02, -6.6525e-03, -1.4190e-01,  2.3052e-01, -2.9157e-01,
        -1.0164e-01,  1.0744e-01,  1.7712e-01, -8.7061e-02, -4.8737e-01,
         2.9810e-02,  1.5023e-01,  5.2886e-02, -5.0132e-02, -1.0607e-01,
         9.1072e-02, -2.0352e-01, -8.5099e-02, -1.8380e-02,  3.4047e-02,
        -8.4820e-02, -2.4688e-01, -1.3088e-01,  2.8763e-03, -1.4157e-02,
         1.1085e-02,  5.5037e-02, -7.6853e-03,  8.2058e-02,  1.4452e-01,
        -1.5363e-01,  8.1824e-03,  1.5808e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0436e-01, -6.8177e+00,  5.5135e-01,  4.9954e-01, -1.9754e-01,
         7.3883e-02,  5.4529e-02, -5.4477e-02,  1.1128e-03,  4.8832e-02,
        -1.7763e-01, -1.9101e-01,  6.0289e-02, -1.9163e-02, -8.2883e-02,
        -2.0925e-02,  2.1612e-01,  9.1129e-02, -3.2202e-02, -1.5364e-01,
         4.2428e-01, -3.4397e-01,  8.4945e-02, -1.0387e-01, -1.4917e-02,
        -2.9180e-02,  6.5986e-02, -7.5611e-02,  5.1762e-02, -1.8799e-01,
         4.1172e-02, -1.8808e-01, -2.5567e-01, -3.9062e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0977,  5.9170, -0.6737, -1.0782,  1.0908,  0.6733,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6079e-04,  4.6815e+00,  3.1281e-01, -1.0100e-01,  1.7707e-01,
         1.1780e-01,  1.0953e-01, -4.1627e-02,  3.9745e-02, -6.6225e-02,
         2.8531e-02,  1.2001e-02, -4.9204e-03, -8.5589e-02, -1.6819e-02,
        -2.2545e-02,  4.5102e-02, -1.4999e-01, -1.0476e-01, -5.8248e-02,
        -1.3646e-01, -9.0727e-03,  8.9445e-02,  1.3049e-01,  8.4821e-02,
        -1.3922e-02, -2.6342e-02,  4.6535e-02, -3.5492e-02, -7.9707e-02,
         4.8347e-02,  3.1075e-02, -1.4924e-02, -7.2425e-02, -1.4177e-03,
         4.0378e-02,  3.3659e-02, -7.8204e-03,  2.9521e-02,  1.8123e-02,
         3.2299e-02,  3.1626e-02, -1.1820e-02,  2.5129e-02, -3.0505e-01,
        -1.0598e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1878, -1.3308,  0.0074,  0.1607,  0.0351,  0.0290,  0.0554,  0.1223,
        -0.0508,  0.0050, -0.0118,  0.0378, -0.0128, -0.0368, -0.0901, -0.0524,
        -0.1267,  0.0645, -0.0583, -0.1045,  0.2104, -0.0491,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2164, -4.3915,  0.1025,  0.3908,  0.0829, -0.0257,  0.4176,  0.6044,
         0.2598,  0.4028,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2599e+00, -5.9024e+00, -5.7432e-01,  8.0942e-02, -8.2310e-02,
         7.5044e-02, -1.0115e-01, -2.2292e-01, -9.7722e-02,  8.0285e-02,
         1.5973e-02, -7.7672e-02, -5.3537e-01, -6.1228e-02,  3.0279e-03,
        -1.1086e-01,  3.6484e-01, -1.3697e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0207, -2.9794, -0.1642,  0.0893,  0.2950, -0.1792,  0.0967,  0.2167,
        -0.1270,  0.1316,  0.0968,  0.0408,  0.0472, -0.2030,  0.0400,  0.1979,
        -0.0612,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2404e+00,  7.1499e+00,  1.0515e+00, -5.7131e-01,  4.7250e-01,
        -3.5370e-01, -2.5024e-01,  7.6431e-02, -4.5026e-03, -7.2497e-01,
         1.2082e-01,  7.6362e-02,  8.7166e-03, -5.3626e-01, -7.0135e-01,
        -2.9717e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1153e+00,  6.5396e+00, -1.3742e-01,  4.3434e-01, -1.2834e-01,
         1.4395e-03,  3.4839e-01, -7.3580e-02,  1.3460e-01,  2.6222e-01,
        -2.5314e-01,  1.1881e-01, -2.9271e-02, -1.4318e-01, -1.5504e-01,
         3.8177e-02,  2.4775e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6490, -9.0667,  0.1825,  0.3487,  0.6102, -0.0224,  0.2775, -0.2796,
        -0.3283, -0.2108,  0.0651, -0.1561, -0.0553, -0.7923, -1.1233,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-1.1911, -6.1298, -0.4263,  0.1342,  0.0521, -0.3629, -0.2183,  0.4188,
        -0.1552,  0.0903, -0.0154, -0.4258, -0.1155,  0.6285,  0.7280,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2841, -4.0583, -0.3927,  0.1992,  0.0901,  0.2817,  0.2013, -0.1035,
         0.1325, -0.0360,  0.0846,  0.2395,  0.0973,  0.4866,  0.0428,  0.0246,
        -0.1299,  0.5348, -0.4366,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5259, -9.6107,  0.0350,  0.5254,  0.1360,  0.6156,  0.4278,  0.4973,
         0.4249, -0.4060,  0.2060,  0.5076,  0.1194,  0.1720, -0.5432, -0.0425,
        -0.8569,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6185e-01, -2.2644e+00, -6.9499e-02, -2.4183e-01,  2.5177e-02,
        -2.7578e-02,  8.7637e-02, -1.7335e-01,  1.0369e-03, -3.9266e-02,
         1.2558e-01,  5.8876e-02,  1.5912e-02,  8.6762e-03, -9.1932e-02,
        -4.6077e-02, -1.6429e-03,  2.0532e-02, -2.4169e-03,  1.6307e-02,
        -8.1345e-03, -7.2556e-02,  1.5221e-02, -9.6886e-03, -2.3788e-02,
        -3.7859e-02, -6.7365e-02,  3.0286e-02, -2.9516e-02, -2.9882e-03,
        -5.2816e-03,  3.7070e-03, -2.2644e-02,  1.3306e-02, -1.2567e-01,
         4.2191e-03, -5.5734e-02, -4.7646e-02, -5.0154e-02, -5.1798e-02,
         2.5255e-02, -4.7020e-02, -5.1264e-02, -7.8088e-02, -5.2292e-02,
        -1.9784e-02,  7.0748e-02, -3.3745e-02, -2.4149e-01,  1.3083e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5420, -5.3365, -0.5490,  0.3705,  0.0578,  0.0496,  0.1560,  0.2510,
        -0.1183, -0.1579, -0.4457, -0.1255,  0.1141,  0.1578,  0.1160, -0.7616,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3083, -8.2585, -0.4316, -0.0549, -0.0402, -0.3478, -0.0599,  0.0881,
        -0.1461, -0.0747,  0.2294, -0.0229, -0.1074, -0.0745, -0.1200, -0.1080,
        -0.1527, -0.0612,  0.2072,  0.1100,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9111e-01,  7.1770e+00, -3.9938e-01,  2.3438e-01, -4.7766e-02,
         1.0621e-01, -2.8495e-01,  2.9491e-01,  5.5215e-02, -4.0456e-03,
        -3.6302e-01,  1.6940e-01, -1.2807e-01, -3.5583e-03, -2.3655e-02,
         1.1195e-01,  3.4963e-01, -9.6192e-02,  3.3777e-01, -9.2201e-02,
         1.5405e-01,  1.8177e-02, -7.0012e-02, -2.7920e-02, -1.4232e-01,
        -2.0065e-01,  1.3425e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3388, -4.6085, -0.0848,  0.0262,  0.1371, -0.1290,  0.0439,  0.3539,
         0.2541, -0.0876, -0.0397,  0.0406, -0.2214,  0.2126,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9481, -3.1462,  0.0032, -0.1990,  0.0664,  0.0777,  0.0746, -0.0467,
        -0.0206,  0.0732, -0.1131, -0.0701,  0.5471, -0.3568,  0.7090,  0.3407,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1500,  3.2544, -0.0134,  0.0165, -0.0681, -0.0318,  0.1523, -0.1547,
         0.0415,  0.0604,  0.0059,  0.0827, -0.1620,  0.0066, -0.2441,  0.0060,
        -0.0170, -0.0278, -0.0707, -0.0399, -0.0112,  0.0103,  0.0496, -0.0886,
        -0.1748,  0.1800, -0.2973,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5778e-01, -5.9031e+00, -4.4547e-01,  4.3731e-01,  7.7859e-02,
         3.1370e-01,  2.6881e-03, -6.6124e-02,  2.2515e-01,  2.9061e-03,
        -7.0703e-02,  1.5463e-03,  1.0539e-01, -1.2220e-01,  1.9803e-01,
        -4.8460e-02,  1.1814e-02, -2.4455e-01,  8.3119e-02, -2.3469e-02,
         1.7157e-01,  7.9617e-02,  3.4884e-05, -5.1469e-02,  2.9861e-02,
         9.9964e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5276e-01, -4.4615e+00,  1.6508e-01,  2.5364e-01, -3.0469e-01,
        -8.4931e-02, -5.2399e-02,  5.3040e-03, -5.0403e-02, -3.8985e-02,
         8.6742e-02,  9.1561e-02, -1.0575e-01,  3.9380e-02,  1.5783e-02,
        -1.7588e-02,  1.0696e-01,  4.6040e-02,  5.3393e-02,  8.3059e-02,
         1.1923e-01,  2.8062e-02, -6.4571e-02,  3.7685e-05, -3.1778e-02,
         8.7181e-02, -2.2670e-01, -1.4423e-02,  1.3827e-01, -1.1684e-01,
         4.5743e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-2.7670e-01, -5.7010e+00,  5.0639e-02, -3.1085e-01, -6.2612e-03,
        -2.8825e-02,  4.3881e-02, -1.6463e-01,  2.4851e-01,  1.3289e-03,
        -5.4910e-02,  1.2841e-01, -6.6963e-02,  5.1782e-02,  3.5360e-03,
        -1.1631e-01, -1.3912e-01,  4.3389e-01, -3.7707e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2498, -7.4692, -0.3087,  0.2104,  0.1867,  0.0910,  0.6629, -0.3320,
        -0.1189,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0553, -8.2015,  0.0733, -0.3460, -0.0144, -0.2433,  0.0920,  0.0550,
         0.2526,  0.3462,  0.1031,  0.3946,  0.0905, -0.1063,  0.0913,  0.0189,
        -0.2842, -0.0839,  0.0146, -0.0508,  0.1368,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8998e-02, -8.3165e+00, -4.3365e-04,  5.2028e-02,  9.7941e-03,
         5.8793e-02,  2.9645e-01,  8.0428e-02,  1.2537e-01,  1.3807e-01,
        -1.3607e-01,  1.8381e-01, -8.2301e-02,  1.0177e-01, -1.7861e-01,
        -1.2977e-01,  1.4768e-01,  1.6103e-01,  3.8101e-02,  1.2070e-01,
        -6.6753e-03,  4.9065e-02, -8.2863e-02,  2.8353e-01, -2.0610e-02,
        -2.3890e-01,  5.5997e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4548e-03,  6.6907e+00,  5.2886e-01,  1.2123e-02, -2.3537e-02,
         1.7494e-01, -1.7481e-01,  1.7639e-02, -1.0159e-01, -1.5023e-01,
         1.1995e-01, -2.3099e-01, -2.4449e-01, -3.7743e-02,  1.8197e-01,
        -3.7648e-02,  1.2648e-02, -1.3694e-01, -5.2947e-02,  3.4061e-02,
         4.7804e-03, -1.1859e-01,  1.1996e-01,  1.0058e-02,  4.1035e-02,
         1.0294e-01, -1.1441e-01,  4.0882e-02, -1.2260e-01,  1.2173e-01,
         1.7341e-02, -2.1203e-02, -7.8501e-02, -3.4083e-02, -2.9516e-02,
        -4.0861e-02,  3.6832e-02,  2.7264e-01,  2.5530e-01,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1331e-02, -6.8907e+00,  3.0099e-01,  2.2068e-01, -4.4252e-02,
        -3.3311e-01, -1.3900e-01, -2.2701e-01, -1.6076e-01, -4.0671e-01,
         2.3108e-01, -1.2432e-01,  1.7513e-01, -3.4445e-01,  3.5854e-03,
        -3.1593e-03, -6.6668e-02, -1.3538e-02,  9.3099e-02,  3.1455e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2888e-01,  5.4369e+00, -3.5137e-01,  1.0706e-01, -5.6040e-02,
         2.4784e-01, -1.4845e-01,  1.5679e-02,  3.4862e-03, -7.1502e-02,
         1.1280e-01, -4.7636e-02,  6.5782e-02, -7.2198e-03, -1.9114e-01,
         5.0217e-02, -1.7792e-01, -6.4575e-02,  1.1052e-02, -4.2844e-02,
         1.1123e-02, -1.6656e-01, -3.3178e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1739e-01,  1.1667e+01,  1.6942e-01, -1.0113e-01,  3.2832e-01,
        -8.8138e-02,  9.8829e-02,  6.9165e-02, -1.0824e-01, -8.7090e-03,
         6.2164e-02,  7.5639e-02,  7.3087e-02,  1.4839e-01, -6.3880e-02,
         2.3974e-01, -2.3679e-01,  1.1210e-02, -2.4836e-01,  4.0419e-02,
        -2.5067e-02,  4.2435e-02, -2.9621e-02,  4.8800e-01,  3.3473e-01,
        -3.8265e-01, -1.2946e-01, -6.4939e-02,  5.3213e-02,  8.3612e-02,
        -1.1184e-01,  9.1682e-03,  1.5716e-01, -2.7623e-02,  1.8244e-02,
         1.0498e-01,  4.0949e-01, -1.3647e-02, -2.3366e-01, -8.0821e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3524e-02, -4.7059e+00, -4.8283e-01, -2.0641e-01, -1.9281e-01,
        -1.5129e-01, -4.9576e-02,  2.5353e-02,  1.7013e-02, -1.3562e-01,
        -5.3351e-02, -9.7360e-02,  4.1201e-02,  3.7195e-02, -1.0255e-02,
        -3.6214e-02, -1.8324e-02, -1.2997e-01, -5.2109e-02, -1.1022e-01,
        -5.4540e-02, -5.4805e-02, -7.0308e-03, -2.2054e-03, -5.3890e-02,
        -1.4808e-02, -2.3427e-02, -3.5048e-02, -1.3788e-03, -5.6095e-02,
        -4.7338e-03,  7.4029e-02,  1.1153e-01,  1.3132e-02, -1.6828e-01,
        -5.8703e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0235e+00,  4.4487e+00, -6.0719e-02, -1.9606e-01,  1.1189e-01,
        -4.1607e-01, -2.5995e-01, -2.3986e-01,  1.7281e-02, -1.7726e-01,
        -7.6477e-02,  3.7305e-02, -4.0944e-02, -2.4302e-02,  1.0982e-01,
        -1.9908e-01, -2.6146e-03, -9.9652e-04,  9.8749e-02, -8.6034e-02,
         7.4281e-02, -3.1395e-02, -2.9763e-02, -1.5457e-01, -1.1375e-01,
         1.3685e-02, -1.7316e-01,  4.0394e-02, -1.7232e-01, -1.5055e-01,
        -3.3858e-01,  2.1383e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4570e+00, -4.5216e+00, -3.8071e-04,  3.2609e-02,  3.4150e-01,
        -4.8541e-02,  2.4373e-01,  2.2821e-01,  1.4194e-01,  6.5725e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.0523, -10.7561,   0.0561,  -0.5920,   0.1915,   0.0581,  -0.4477,
          0.1439,  -0.0187,   0.0707,   0.0994,   0.6585,   1.6301,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-0.0255, -4.5700,  0.1940, -0.1590, -0.3221,  0.1561, -0.1453, -0.3120,
         0.0688,  0.1299, -0.0927,  0.0558, -0.2053, -0.1126,  0.1672,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8771e-01,  8.5173e+00,  6.9856e-01,  1.0272e+00,  3.2707e-01,
         1.8449e-01, -1.2101e-01, -6.9025e-02, -1.6259e-03,  2.0661e-01,
         1.0477e-01,  7.9745e-02, -6.8361e-01, -5.5480e-01,  2.2136e-01,
         5.3855e-01,  3.6186e-01, -1.6456e-02, -9.6738e-03, -1.7773e-02,
         7.4162e-02, -2.7506e-01, -8.0306e-01, -1.6426e-01,  1.9615e-01,
        -1.0742e-01,  8.9326e-02,  2.2244e-01, -8.8262e-02, -3.2359e-01,
         6.6538e-02,  2.7341e-02,  2.0276e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6373e-02,  1.0208e+01,  6.8133e-02, -1.5738e-01,  4.7314e-03,
         7.2046e-01,  3.3457e-01,  8.2410e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3199e-01, -2.4887e+00,  4.4868e-02, -6.6476e-02, -3.5832e-02,
         7.0846e-02,  1.3707e-01,  5.1439e-02, -2.5912e-03, -8.5758e-02,
        -6.3778e-02,  3.7718e-02,  6.1809e-02, -7.7137e-02, -4.7412e-02,
         8.3719e-02,  4.4409e-02,  4.3133e-02, -6.0309e-02, -1.4485e-04,
        -2.1071e-02, -6.2093e-02, -6.5425e-03,  3.6891e-02,  1.0501e-02,
         4.2453e-02, -1.0047e-01, -5.2933e-02,  1.0972e-01,  5.9673e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7647e+00,  9.7213e+00, -1.9158e+00, -1.1765e+00, -1.1438e+00,
         1.7415e-01, -2.8005e-01, -2.9456e-01, -3.1667e-01,  4.2347e-03,
        -1.1284e-02,  4.3890e-01, -9.1873e-01, -5.6625e-02,  3.8721e-01,
        -3.0070e-02, -8.0710e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6288e-02, -8.3101e-01,  3.5615e-02, -7.6270e-02,  7.2869e-02,
        -1.0832e-02, -9.5896e-03,  2.1948e-02,  1.2292e-02, -1.8382e-02,
        -2.7188e-03, -8.0324e-03,  2.0428e-02, -2.7078e-02, -1.1784e-02,
         5.9885e-04,  1.5093e-03,  2.7373e-03, -3.9242e-03, -1.9979e-02,
        -1.2494e-02, -1.7988e-03, -5.7673e-02, -1.2510e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9915e-01,  5.8611e+00,  4.0011e-01, -3.0491e-02,  3.2776e-01,
         1.4617e-02,  7.9268e-02, -1.3136e-01,  3.8205e-01,  1.2356e-01,
         7.2520e-03, -8.1015e-02, -8.6748e-02,  2.6091e-02,  2.7683e-02,
         4.4632e-03, -3.3406e-01, -1.3019e-01,  1.7128e-02, -6.1357e-02,
         5.5355e-02, -3.7816e-02, -2.8651e-02,  1.0073e-01,  6.7092e-02,
        -3.9935e-02,  6.8233e-02,  2.5193e-01, -6.6305e-02, -3.8870e-02,
        -6.7384e-02,  9.0762e-02, -4.6846e-04,  2.3846e-01, -6.7122e-02,
         8.3878e-04,  6.3776e-03,  3.9785e-01,  1.5306e-01,  9.3345e-02,
        -2.9537e-02,  4.6061e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6727e-01,  7.6242e+00, -7.4000e-01,  2.1330e-01,  3.6885e-02,
         1.0977e-01,  2.3241e-01, -3.3841e-04,  5.1314e-01,  2.8275e-01,
         1.2436e-01,  1.1891e-01,  3.9276e-03,  1.5447e-01,  2.1443e-01,
         1.4953e-01,  3.6917e-02,  6.6354e-02, -2.9212e-03,  9.1690e-02,
         3.9730e-02, -7.3315e-02, -4.1788e-02, -1.2106e-01,  1.0649e-01,
        -1.4506e-01, -3.3627e-03,  1.6141e-01,  2.3344e-01,  9.8938e-02,
         2.7122e-01, -1.3313e-01,  1.3507e-01,  1.1843e-01,  3.4509e-01,
        -2.7159e-02,  9.7894e-02, -3.0056e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4121, -5.8050,  0.0670, -0.6291, -0.2638, -0.1533, -0.0296, -0.0914,
        -0.1034,  0.0319,  0.2728,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4191e-01,  4.6395e+00, -7.0308e-02, -4.3797e-02,  1.0218e-01,
        -7.9741e-02, -3.6161e-02, -2.4403e-02,  2.8693e-02, -2.7449e-02,
        -2.4568e-01, -1.6347e-02, -4.2902e-03, -5.8205e-02,  4.6599e-02,
         9.6310e-02, -2.0100e-01, -6.6047e-02, -3.4510e-02,  4.1418e-02,
         7.1584e-02,  1.7759e-03,  2.7638e-02,  5.1232e-02, -4.3565e-03,
        -2.0197e-03,  5.3246e-02, -4.5072e-02,  5.1456e-02, -3.2891e-02,
         1.7928e-01,  1.7264e-01,  4.5546e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.4305e-01,  4.3602e+00, -3.7423e-01, -7.9548e-02, -1.5488e-01,
        -8.2520e-02, -3.9515e-04, -4.8442e-02,  5.9373e-02, -2.1490e-02,
         5.2839e-02,  7.2875e-04,  6.5781e-02, -6.8253e-02,  3.8018e-02,
        -1.7141e-01, -1.9877e-01, -5.9930e-02, -4.9371e-01, -1.2262e-01,
        -4.0393e-02, -6.7686e-02, -1.0405e-02, -4.8166e-03, -1.4783e-02,
         3.6985e-02, -2.7525e-02, -5.2962e-02, -7.3728e-02, -8.9944e-02,
         1.8493e-01, -5.0443e-02, -3.0705e-01,  2.6974e-02, -2.0636e-01,
        -9.8083e-03, -6.2766e-02, -5.9078e-02, -4.6774e-02,  5.3231e-02,
         1.0629e-01,  8.4896e-02,  6.4708e-02,  1.0756e-02,  1.9821e-02,
        -8.3468e-02, -5.8252e-03, -1.0472e-01,  2.7850e-02, -5.8118e-01,
         3.1104e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5054e+00,  3.3935e+00,  1.2737e-01, -1.0080e-01, -2.6936e-01,
         9.9763e-02,  3.1822e-02,  1.1513e-01, -1.4853e-01, -2.1270e-02,
        -3.6596e-02,  1.0116e-02, -7.0002e-02,  5.2130e-02, -5.0141e-02,
        -9.1531e-03, -2.1396e-02, -4.3785e-03,  4.8550e-03,  6.9388e-03,
        -1.7251e-01, -5.0605e-02, -4.2326e-02,  4.8307e-03,  7.9387e-02,
         6.5035e-02, -1.9197e-01, -8.2758e-03, -3.9650e-02, -4.0808e-02,
         1.4313e-02, -2.6598e-02, -5.2568e-02, -7.1839e-02, -1.1671e-01,
         3.2845e-03,  3.8139e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-4.0967e-01, -4.3143e+00,  2.2923e-01,  5.3505e-01,  7.7135e-02,
         2.4594e-02,  5.5325e-02, -1.0301e-01, -1.0827e-01, -4.5148e-03,
        -3.1478e-04,  2.5783e-02, -8.1127e-02,  2.1002e-04,  3.6007e-02,
         6.9552e-02,  4.1648e-02,  2.2517e-02, -8.1362e-02,  6.8800e-02,
         1.7310e-01,  1.8403e-01,  6.8777e-02,  4.4882e-02,  6.8601e-02,
         1.0204e-01,  7.9662e-02,  1.6724e-01, -1.4013e-01, -4.1617e-02,
        -6.0901e-02,  1.0792e-01,  5.6426e-02,  3.4588e-02,  3.4393e-02,
        -1.2652e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6442e-01, -1.0135e+01, -8.8744e-02,  2.1362e-01,  2.5178e-01,
        -1.4015e-01, -1.4158e-02,  1.9928e-01, -2.8753e-01,  1.8221e-01,
         5.7956e-01,  5.7225e-02,  5.1997e-02,  4.6834e-02,  1.1324e-02,
        -1.1940e-01,  1.2959e-02, -2.3172e-02,  3.8711e-03,  5.0791e-02,
         4.4263e-02,  9.5659e-02,  1.2130e-01,  1.4081e-01,  1.2871e-01,
        -8.9688e-02, -3.4553e-03, -1.1100e-02,  1.8570e-01,  3.7678e-04,
         4.2300e-02, -7.9512e-02, -5.5730e-02, -7.0447e-02,  4.6213e-02,
         4.4314e-02,  7.7194e-02,  1.5726e-02,  1.5698e-02, -1.2699e-02,
         1.8905e-02,  9.7500e-02,  4.8830e-02, -1.1690e-02, -1.0372e-01,
        -4.5304e-02,  5.9183e-02, -8.3159e-02, -2.8203e-02,  5.9125e-02,
         7.1173e-02,  4.0356e-02, -3.7858e-02, -2.5017e-01, -5.4683e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9120e+00, -8.3195e+00, -4.1308e-01, -2.2685e-01, -1.6510e-01,
        -2.3322e-01,  9.7632e-02,  3.4931e-02, -9.2336e-02, -5.8947e-03,
        -2.4656e-01, -5.9902e-02,  1.6483e-01,  3.5514e-01, -4.4554e-03,
         7.9161e-01,  5.1766e-02,  3.7750e-02, -1.0597e-02,  1.6626e-01,
         3.7814e-01,  2.9351e-01, -3.7272e-01, -3.3639e-01, -2.7643e-01,
        -3.3700e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1840e-02,  1.0385e+01,  7.3862e-01,  6.4129e-02, -4.6645e-02,
        -7.0198e-02,  2.5337e-01,  4.0128e-02,  4.8061e-02, -1.5588e-02,
         4.5551e-03, -7.8196e-02,  7.7509e-02, -2.9104e-01, -2.5751e-01,
         1.3811e-02,  1.5210e-01,  7.0973e-02, -2.3891e-02, -1.4379e-01,
        -1.7890e-02,  3.7139e-02,  5.3501e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1338e-01,  7.6366e+00,  8.3941e-01,  1.8629e-01,  9.7764e-02,
         1.8247e-02,  2.9451e-01,  9.6430e-03, -1.4829e-01, -7.5073e-02,
        -1.8454e-01,  2.0746e-01,  5.2751e-01, -3.2548e-02, -7.8700e-03,
        -2.2729e-01, -5.1077e-02,  6.2132e-03,  2.0281e-01,  7.6190e-02,
         6.7122e-02, -5.9045e-02, -5.2098e-02, -2.1304e-02,  2.5757e-01,
         1.1593e-01, -6.9932e-02,  3.7765e-01, -1.5679e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1676, 14.6463,  0.4991,  0.3693,  0.5361, -0.1299,  0.6275,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2074, 11.8233, -1.2278,  0.3214,  0.5772, -0.7215,  0.3687, -0.0998,
         0.0364, -0.4821,  0.1499,  0.7781,  0.1345,  0.0739,  0.2507,  0.1558,
         0.5238,  1.0345,  1.4967,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8138e-01,  6.3775e+00,  9.3341e-02,  1.7310e-01,  2.2084e-01,
        -3.3593e-01, -1.2837e-01, -9.8734e-02,  2.1538e-01,  1.1219e-01,
        -8.4590e-02,  8.4123e-02, -1.6522e-01,  3.6035e-01, -1.0501e-02,
         2.2286e-02,  1.0926e-01,  7.5278e-02, -1.9131e-02, -3.0498e-02,
         6.1069e-02,  4.1339e-03,  9.7684e-02, -1.1414e-02, -1.3152e-01,
         4.3258e-02,  2.0487e-01,  2.8218e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0071, -4.3772,  0.0713,  0.1306,  0.0709, -0.1061, -0.1632, -0.0112,
         0.0475, -0.0192,  0.0362,  0.0939, -0.0491,  0.0494, -0.0303, -0.0535,
        -0.1339, -0.0517,  0.1223, -0.0468,  0.1223,  0.1390,  0.0056, -0.0086,
        -0.0252,  0.0142,  0.0949, -0.0192, -0.1294, -0.4768, -0.2470,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3353e+00, -1.0037e+01, -2.5850e-01,  2.7000e-01, -6.0703e-01,
         6.1964e-01, -1.8598e-01,  1.1846e+00, -3.9916e-03, -1.9465e-01,
         4.6436e-01,  6.8483e-03,  6.7986e-01,  1.9551e-01,  1.4131e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3682,  4.6363, -0.2164,  0.1485, -0.0687, -0.0099,  0.1271,  0.0753,
         0.0949, -0.0816,  0.0144,  0.0486,  0.0267, -0.3118, -0.0375, -0.0438,
        -0.1681,  0.0835, -0.0060,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5426e-01,  7.7202e+00, -5.2034e-01,  1.8086e-01, -2.8368e-01,
        -1.1155e-01, -2.7747e-01, -3.7499e-02, -4.2646e-02,  7.4620e-02,
         9.0496e-02,  5.6690e-03, -3.9318e-03,  9.7534e-02, -2.1282e-01,
         3.0926e-01,  1.2274e-01,  6.3276e-03,  4.5977e-02, -6.2505e-02,
         5.3915e-02, -3.4770e-02, -6.9183e-02,  1.4097e-02,  6.4892e-02,
         5.9729e-02, -1.1467e-01,  1.1698e-01,  2.4504e-01,  2.2504e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 1.5128,  7.8171, -0.1940,  0.3187, -0.1912,  0.0878, -0.0139, -0.0202,
        -0.3830, -0.3278, -0.0581, -0.2217, -0.2920, -0.0812,  0.1105,  0.1618,
        -0.1159,  0.1121, -0.1036, -0.0803, -0.2038, -0.1519,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.1012, -11.2312,  -0.8536,   0.4064,   0.0574,  -0.1354,  -0.0668,
          0.0888,   0.0169,   0.2067,   0.2488,  -0.4249,   0.3523,   0.1469,
          0.0789,  -0.0683,  -0.0660,   0.0574,   0.1145,   1.2115,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.2603, -13.9864,   1.0576,  -0.5058,   0.4036,   0.1206,  -0.1069,
         -0.0464,  -0.0833,  -0.0818,  -0.1313,   0.0831,   0.0644,   0.2771,
         -0.2484,   0.0401,  -0.0737,  -0.2710,   0.1298,  -0.3294,   0.0468,
         -0.0220,   0.0771,  -0.2662,  -1.2402,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3814e-01, -7.0999e+00,  9.6624e-01,  3.8127e-01, -3.5632e-02,
         2.2654e-01, -1.6635e-01, -1.8331e-02, -1.1181e-01,  2.7376e-01,
        -2.2010e-01,  1.9564e-01,  5.1904e-02, -2.4860e-02, -2.4981e-02,
         1.6775e-01, -1.0907e-01, -7.5359e-02, -6.5808e-02,  1.2767e-01,
         1.6934e-01, -3.9119e-02, -9.9069e-02,  6.0703e-03,  7.1083e-02,
         4.3815e-02, -1.9594e-01, -1.6167e-01,  4.0606e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6751,  8.5083,  0.6844,  0.1610, -0.3451, -0.0599,  0.2498,  0.1136,
         0.4118,  0.6007,  0.0865,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7209e-04,  4.4726e+00,  3.1244e-01,  2.0492e-01,  7.6338e-02,
        -2.5379e-01,  6.6538e-02,  1.1144e-01,  8.5359e-02,  6.2389e-02,
        -1.9389e-01,  6.0228e-02,  1.8818e-01,  1.0485e-01,  3.0794e-01,
        -2.7739e-02, -1.2068e-02, -5.3122e-03,  9.3385e-02, -1.0022e-01,
         4.1824e-02,  2.5402e-01,  4.6779e-01, -4.9544e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.4934e-01,  5.6649e+00, -4.7736e-01,  7.4886e-02, -1.1191e-01,
         1.3367e-02, -1.0138e-02, -1.3916e-03,  1.2879e-02,  2.0460e-02,
        -2.5255e-01, -7.5424e-02, -4.7263e-02, -4.0999e-04,  7.7375e-02,
         2.3164e-03, -5.7185e-02,  2.1436e-02, -8.2633e-02, -3.5418e-02,
         4.7195e-02,  4.4325e-03,  5.3349e-02,  1.2029e-01,  9.2798e-02,
        -1.4022e-02, -4.1751e-02,  4.9960e-02,  4.1207e-02, -7.3237e-02,
        -2.3584e-02,  9.9176e-03, -2.3474e-02, -6.2165e-02, -7.5423e-03,
        -3.2045e-02, -1.6950e-01, -1.6858e-02, -4.3347e-02,  2.5111e-02,
        -3.4571e-03,  1.2295e-03,  9.5111e-02, -1.0485e-01, -2.1752e-02,
        -8.8705e-02,  1.7328e-01,  4.8837e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8222, -7.2358,  0.4218,  0.1846, -0.1743,  0.1455, -0.0393, -0.2036,
        -0.0914,  0.0571,  0.2503,  0.2155,  0.0798, -0.0270, -0.1542,  0.2820,
        -0.2159,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8554, 12.0758, -2.0780, -0.3252, -0.0251, -0.1089, -1.5572,  0.4284,
         0.2161,  0.7042, -0.4407, -0.2490,  0.2304, -0.1198,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7460e-01,  4.7566e+00,  2.5270e-02, -2.4190e-03, -2.4383e-02,
         6.0643e-03,  1.0751e-01,  6.8083e-02,  1.3373e-01,  4.1366e-01,
        -1.8729e-02,  5.6098e-02,  1.2334e-02, -2.2554e-02, -4.1020e-02,
         1.5747e-02,  3.5333e-03, -4.4639e-04,  3.6853e-03,  1.6983e-01,
        -9.9424e-02,  8.0880e-02,  3.2471e-02,  7.7685e-02,  6.2254e-02,
        -9.0971e-02,  4.5157e-02,  6.8947e-03,  1.7801e-02, -2.1348e-02,
         1.1310e-01, -8.0784e-02,  1.8302e-01,  2.0682e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4698e-01,  6.0406e+00,  2.7957e-01,  5.1687e-01,  2.6457e-01,
         8.5514e-02,  1.4123e-03, -2.4999e-01,  3.3152e-02, -5.5823e-02,
        -5.3304e-02, -1.9308e-01,  2.8240e-03, -1.2903e-02, -8.1733e-02,
        -1.4086e-01,  4.9674e-02, -1.4278e-01,  1.2553e-02,  1.5816e-01,
         6.0228e-03,  2.6043e-02, -1.9858e-02,  4.8847e-03,  2.6404e-01,
         7.3257e-03, -1.7090e-02, -4.9176e-02,  1.2858e-01, -2.7976e-01,
        -1.4031e-02, -7.3358e-02, -2.8252e-01, -7.3127e-02,  1.3336e-02,
         5.6303e-02, -2.0020e-01,  3.3243e-01, -5.7172e-02, -4.1180e-02,
         8.1754e-03,  2.5321e-01,  1.6767e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1392e+00,  7.9600e+00,  1.0322e+00,  8.9078e-02, -6.8694e-02,
         7.7354e-03,  8.9420e-02, -1.0018e-02,  3.0520e-01, -9.0687e-02,
         8.6830e-02,  1.3170e-01,  7.7829e-02,  3.4833e-02, -3.5834e-02,
        -9.5892e-03,  8.2632e-02,  6.9309e-02, -4.0382e-02,  7.7402e-02,
        -6.4266e-02, -8.4001e-02, -1.6151e-01,  1.0442e-01,  1.6483e-02,
        -6.8838e-04,  9.4141e-02, -4.1647e-02, -1.0464e-02,  1.2819e-01,
         4.9955e-02,  5.7204e-03, -1.8376e-01,  5.9731e-02,  2.3994e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.0257,  5.0455, -0.0694, -0.3171,  0.0455,  0.0851,  0.0627, -0.0259,
        -0.2529,  0.1334,  0.0691, -0.2154, -0.0462,  0.1450, -0.0844, -0.0073,
        -0.0122,  0.0792,  0.0193, -0.0961,  0.0576,  0.0059,  0.1760,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5756e-01,  7.9660e+00,  1.1199e-01,  9.5446e-03, -3.2876e-01,
         5.2537e-02, -1.4286e-01, -2.3629e-02, -1.9209e-01, -1.2202e-01,
        -1.8604e-01, -7.7474e-02, -4.1584e-01, -7.3862e-02, -4.8604e-02,
         7.7509e-02,  8.6500e-03, -1.2610e-01,  1.4299e-01, -1.4334e-02,
         2.4214e-02, -1.3312e-01, -4.4949e-03, -1.3486e-01,  5.9120e-02,
        -8.5503e-02, -4.3529e-02,  8.1060e-02, -1.5255e-01, -3.1222e-01,
        -7.4862e-04,  2.0168e-01,  9.1003e-03,  3.8847e-02, -1.0841e-01,
         9.2662e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0361e+00,  7.2926e+00,  1.1722e+00, -2.5251e-01,  8.2270e-02,
        -4.9503e-01, -5.4995e-02,  1.2474e-01,  8.1436e-02,  6.9184e-02,
        -7.2843e-03, -1.9558e-01,  8.5706e-02,  4.0057e-01, -8.5001e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.2485, -14.8147,   0.4432,   0.2037,   0.2843,   1.8404,   0.6372,
          0.2639,   0.1098,  -0.3809,   0.5238,   0.4653,   0.2668,  -0.9779,
          0.2593,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6084e-02,  9.0918e+00, -2.1456e-02,  1.6833e-01,  3.0552e-01,
         4.7547e-02, -2.2378e-01,  4.9074e-02,  2.4274e-02, -3.1871e-02,
         1.1709e-01, -7.5935e-02,  8.1950e-01,  1.1018e-02, -2.1746e-02,
        -3.3366e-04,  4.4230e-02,  6.2155e-02,  7.9030e-02,  3.4517e-03,
         3.0940e-02, -5.9858e-02,  2.8996e-01,  4.3367e-02,  6.7370e-03,
        -2.4621e-01,  2.7698e-02, -1.2122e-01,  1.4042e-01, -4.2021e-02,
        -1.2949e-02, -5.8692e-02, -2.2954e-01,  6.7986e-02,  5.1647e-02,
         3.7136e-02, -6.3970e-02,  2.6482e-02, -5.8669e-02, -1.3229e-03,
         2.1159e-02,  2.0275e-01,  7.2342e-02, -2.4018e-02,  2.6331e-01,
         8.6044e-02, -7.5954e-03, -1.4192e-03,  1.0749e-01, -5.4659e-02,
        -1.9665e-01,  4.5166e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.1340, -12.8290,  -0.5018,  -0.1645,  -0.0416,   0.3913,   0.1366,
          0.0798,   0.0800,   0.2318,  -0.2257,   0.3770,   0.3908,  -0.2844,
         -0.2739,  -0.1047,  -0.1401,   0.7470,   0.4545,  -0.1597,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5603e-01,  5.3355e+00,  1.9214e-01,  1.5393e-01,  5.8180e-02,
         1.7599e-01,  6.0546e-04,  9.4875e-02, -7.8264e-02,  1.0202e-01,
         9.0981e-02, -5.9675e-02, -2.2800e-01,  6.7001e-03,  1.0317e-01,
         1.7102e-02,  1.0246e-02,  8.7566e-02, -4.3007e-03, -1.0598e-01,
        -1.0949e-02, -2.5668e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8912, -9.7523, -0.6208, -0.2177,  0.1604,  0.1611, -0.0722, -0.6968,
         0.4092, -0.3502, -0.1342, -0.2885, -0.0720, -0.0278,  0.3008,  0.5092,
        -0.1148,  0.1278,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6734e+00,  8.7625e+00,  1.6170e-01,  1.1992e-01, -2.5022e-01,
        -1.4376e-01, -4.1910e-01,  3.2921e-02, -1.0755e-01,  4.9298e-03,
         1.4439e-01, -1.4968e-01, -3.2080e-02,  4.8706e-02, -1.2914e-03,
         3.0904e-02,  1.3844e-01, -6.6242e-02,  6.8125e-02,  1.3504e-01,
        -3.7888e-02, -1.9620e-02,  1.6015e-02, -2.7189e-01, -1.6415e-01,
        -7.3496e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.5048e-02, -8.0758e+00, -9.3821e-01, -1.5369e-01,  1.3782e-01,
        -5.4623e-02,  1.0525e-01,  1.7169e-01,  1.2751e-01, -1.5412e-01,
        -2.6330e-01, -3.6017e-03, -4.5266e-01,  5.1992e-02, -4.3210e-01,
        -7.0569e-02,  9.5067e-02, -3.4734e-01, -1.6288e-01, -1.7232e-01,
         8.7132e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3268e-01, -6.6005e+00, -7.5241e-01, -3.6199e-02, -5.6151e-03,
         1.7346e-01, -4.9363e-02,  5.8398e-03, -4.8402e-02,  3.7359e-02,
         3.3263e-02, -2.6564e-01,  6.6950e-02,  5.8798e-02, -6.1949e-02,
         6.5423e-02,  5.7659e-02, -8.0742e-02,  1.1317e-02,  3.3986e-01,
        -8.2299e-03, -2.3646e-02, -1.0810e-02,  5.5941e-03, -1.5195e-02,
         2.2035e-01, -2.2001e-03, -6.0176e-02,  4.3583e-03,  6.8669e-02,
         9.3129e-02,  3.0694e-02,  3.3448e-02,  2.2688e-02, -3.3503e-02,
         4.5196e-02,  3.7410e-02,  1.4907e-01,  1.6785e-01,  3.7826e-02,
         9.0539e-03,  2.5527e-02,  3.7349e-03,  3.5340e-03,  1.2939e-01,
        -3.7172e-01, -1.2693e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3114,  2.8877, -0.8327, -0.4421, -0.0916, -0.5880, -0.4439,  0.0120,
         0.6778, -0.2225,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #500: [tensor([-8.8446e-02, -8.1498e+00,  1.3157e-01, -1.3241e-01, -1.1623e-02,
        -1.8310e-02, -6.6573e-03,  2.4068e-02, -7.9018e-02, -6.3594e-03,
         1.7706e-02, -4.0323e-02, -1.5148e-02, -7.6237e-02, -5.8908e-02,
        -6.7832e-03, -1.7854e-03,  2.7243e-02, -1.0133e-02,  2.0395e-01,
         5.5895e-02,  2.5137e-02,  1.1260e-01,  4.1172e-02, -8.6173e-02,
        -3.9496e-02, -7.2603e-02, -1.7580e-03,  1.0310e-01, -2.2485e-02,
        -2.9153e-02,  1.3146e-02,  6.2672e-02, -8.8051e-02, -8.5527e-02,
         1.1297e-01,  2.2298e-02,  3.0654e-02, -5.2645e-02, -1.1897e-01,
        -5.3423e-02,  4.2512e-02,  1.3037e-01, -4.8952e-03,  9.7984e-02,
        -2.6813e-01, -1.9162e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3870e-01,  6.2543e+00,  4.9420e-01, -3.1951e-02, -1.8369e-01,
         2.1713e-02,  3.0475e-02,  1.4530e-01, -2.2543e-02,  3.2618e-02,
        -8.1225e-02, -1.7525e-01,  4.4718e-02, -5.3684e-02, -7.2737e-04,
        -3.2910e-02, -3.4621e-02, -1.4259e-01, -6.7239e-02,  2.2565e-02,
         7.2958e-02, -1.1913e-01, -6.0172e-02, -9.3798e-02,  2.5625e-02,
         2.1934e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0288,  6.4610,  0.7063,  0.1949, -0.3570, -0.4840, -0.5178,  0.3964,
        -0.1469, -0.3410,  0.0709, -0.0765,  0.1001, -0.1515, -0.0799, -0.1240,
        -0.0366, -0.1403,  0.0205, -0.0371, -0.2429, -0.0854, -0.2523,  0.0844,
        -0.2982, -0.2014, -0.1865, -0.5447,  0.0477,  0.0872,  0.1009, -0.0581,
         0.0193,  0.0400,  0.1329,  0.0826,  1.3511,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0462,  3.4918, -0.5364, -0.1866,  0.0579, -0.1476,  0.0501,  0.1742,
         0.0288,  0.1172,  0.0559,  0.0103, -0.0834,  0.2904, -0.1472,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0774, -7.3452, -0.4339, -0.0798, -0.0580, -0.1214, -0.0189, -0.1865,
        -0.0150, -0.4243, -0.2892, -0.2657,  0.0677,  0.2866,  0.1133,  0.0404,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.1850e-02, -8.1996e+00,  3.1754e-01,  4.4028e-01, -5.1173e-02,
         1.2005e-01,  1.5129e-01,  3.5106e-01,  1.3756e-01,  1.9170e-02,
        -1.2815e-02,  2.2809e-01, -1.4047e-01,  1.8173e-01,  2.0398e-02,
         9.4378e-02,  4.9613e-02, -3.5971e-02,  9.2842e-03, -2.5548e-05,
         9.6755e-02, -1.8570e-01, -1.6442e-02,  5.9676e-02, -1.3518e-02,
        -5.1399e-02, -1.1972e-01, -7.4260e-02, -3.6112e-01, -1.2820e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7848e-01, -8.7798e+00, -9.3598e-01,  6.0064e-02, -9.6785e-02,
        -4.7738e-02, -7.3438e-02, -7.6407e-03,  1.6928e-01,  2.8749e-03,
         7.0292e-02,  3.1464e-02,  7.6923e-02,  2.7999e-02,  2.7287e-02,
        -1.3473e-02, -5.6554e-03, -3.9767e-02, -9.7835e-02, -5.5924e-02,
        -2.0567e-02,  1.4756e-02, -3.2492e-02,  4.3258e-02,  1.0427e-01,
        -3.3294e-02, -5.2912e-02, -9.4722e-03, -3.7098e-02, -7.4719e-02,
         8.9265e-02,  1.2047e-01, -3.8431e-02,  2.7600e-02, -1.1114e-02,
        -1.1621e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1693e-01, -5.2894e+00,  5.8557e-02, -1.2730e-01, -1.5558e-01,
         1.8711e-01,  3.4465e-02,  1.5052e-03,  1.2671e-01, -7.2378e-02,
         1.0161e-01, -9.2113e-02,  6.6143e-02,  9.7441e-03, -9.2190e-02,
        -3.9719e-02,  6.6287e-02,  4.5968e-02, -7.7420e-02,  4.2483e-02,
        -2.0814e-02, -1.2237e-01,  7.2096e-03, -5.8213e-02,  2.2561e-02,
        -4.8272e-02, -7.5738e-02, -6.6949e-03,  2.9370e-02,  6.6909e-03,
        -7.8151e-02,  3.5084e-02,  1.8083e-01,  1.6258e-01, -7.1192e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5624, -6.8793, -0.7655, -1.3834, -0.6188,  0.2436, -0.0154,  0.2432,
        -0.7464,  0.2810, -0.6361,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8812, -6.8177,  0.6548,  0.4254,  0.0134,  0.3150,  0.2682,  0.0977,
        -0.1596,  0.0840,  0.1472, -0.1436, -0.0120, -0.3334, -0.0180,  0.1758,
         0.1860, -0.0636,  0.0649,  0.0156,  0.2807, -0.0937,  0.0383,  0.5952,
         0.4308,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0277,  6.2726, -0.1053, -0.1691,  0.4458,  0.3952,  0.0260, -0.0716,
        -0.1170,  0.0748,  0.0897, -0.1286, -0.0466,  0.0538,  0.1128,  0.2424,
        -0.4230, -0.0081,  0.0631, -0.0433, -0.2442,  0.2917,  0.2805, -0.3431,
         0.8013,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0472e-01,  6.0695e+00,  2.3716e-01,  2.6839e-02,  2.1176e-01,
         1.1836e-01, -1.0654e-02,  9.6109e-02,  4.6748e-02,  8.8771e-02,
         2.9365e-02, -1.2613e-01, -7.5358e-03, -3.0567e-02,  3.6939e-02,
        -6.8154e-02, -9.2931e-02,  6.2640e-02,  1.6852e-02, -1.1468e-02,
         2.3087e-02,  2.2614e-02, -4.9411e-02,  1.6209e-02,  1.9275e-01,
         1.2799e-01, -5.9924e-02, -9.3968e-02, -2.6191e-03,  1.0820e-01,
         1.2011e-02, -2.3108e-01,  1.0919e-01,  2.1940e-02,  8.7952e-02,
         3.0110e-02, -1.7823e-02, -1.9771e-02,  1.1078e-01, -7.1298e-03,
        -6.0247e-02, -2.4144e-02, -1.1709e-01, -6.3604e-02,  5.9777e-02,
         3.6365e-02, -1.8646e-02, -2.1083e-02, -3.7700e-02, -8.4877e-02,
        -2.2200e-01, -2.9208e-02,  3.2531e-01,  6.2488e-02], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-0.1885, -6.1527,  1.0318,  0.0838,  0.1515, -0.0983,  0.2297,  0.1248,
         0.0577, -0.0821,  0.1163,  0.0309,  0.2075,  0.1151,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2545,  6.2917,  0.7022,  0.3681,  0.0664,  0.3644,  0.3099,  0.5609,
         0.1840, -0.2744, -0.0543,  0.0514, -0.0499,  0.1240,  0.0635, -0.0878,
        -0.0802,  0.1035, -0.0367, -0.2638, -0.0167,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1494, 14.7522,  0.8950, -0.3031, -0.1658, -1.0601, -0.9648, -0.3507,
        -0.3000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.1620e-01,  8.1166e+00,  9.6156e-01, -1.6892e-01, -6.5766e-03,
         1.4339e-01,  1.1960e-01, -4.1839e-02,  2.9769e-02,  1.8039e-01,
         1.0879e-01, -8.4817e-02, -1.7243e-02, -5.1873e-02,  2.0426e-02,
         1.6483e-01, -5.5815e-02,  9.9921e-02,  1.3070e-02,  1.9660e-01,
         5.3390e-03,  7.3616e-02, -1.0021e-02,  1.4489e-01,  2.6706e-02,
        -7.9367e-02,  3.3779e-01, -5.3582e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2545e-01, -3.4439e+00, -3.3293e-01,  1.0226e-01, -4.8626e-02,
        -1.6617e-02,  4.0435e-02,  2.0747e-01, -8.7992e-03, -8.3669e-02,
        -1.5987e-01,  1.2370e-01,  1.5687e-03,  1.9304e-02,  3.4203e-02,
        -1.5032e-01, -4.9320e-02, -8.2694e-02, -9.7157e-02,  1.0706e-01,
        -6.3262e-02,  3.2615e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0611e-03,  7.3838e+00, -7.6589e-01,  9.5361e-02,  5.8543e-02,
         5.8742e-02,  1.4160e-01,  1.1108e-01, -2.3833e-01,  3.3272e-01,
         1.1471e-01, -8.9319e-02,  2.2815e-01, -7.0502e-02,  4.2153e-02,
        -2.2254e-02,  4.9765e-02,  5.7529e-02, -3.9549e-03,  1.1255e-01,
        -6.2715e-03, -3.1750e-02,  2.4377e-02, -1.6647e-02,  7.3259e-02,
         1.2790e-01, -5.5254e-03,  1.8547e-01,  1.5787e-01, -1.3705e-01,
         1.0365e-02,  6.2220e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0767, -4.7316, -0.6312,  0.0517, -0.0263, -0.4151, -0.0322,  0.0782,
         0.0398,  0.1847,  0.0264, -0.0117,  0.0552, -0.0369,  0.1292,  0.0370,
        -0.0269, -0.0115,  0.0273,  0.0246, -0.0288, -0.0555, -0.0180,  0.1303,
        -0.0236, -0.0713, -0.1634, -0.0418, -0.0140,  0.0217,  0.0839, -0.0974,
         0.0099, -0.0076, -0.0569, -0.0804, -0.0558, -0.0739, -0.0872, -0.0261,
         0.0495, -0.0589, -0.1269, -0.0352], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9704e+00, -1.1397e+01, -8.8394e-01,  2.0243e-01, -2.3493e-01,
        -4.2764e-01,  2.2077e-01,  4.9910e-02, -3.4358e-01,  4.2405e-03,
        -1.1936e-01,  3.6514e-01, -7.8668e-02, -3.8151e-01,  3.5599e-01,
         8.1359e-01,  2.8642e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5422,  9.8568,  0.0949,  0.3964, -0.1316, -0.3884, -0.0204,  0.1299,
        -0.0800,  0.2750,  0.1089,  0.0291, -0.0323,  0.0841,  0.0353, -0.0794,
         0.0526, -0.1641,  0.0653,  0.0507, -0.0457, -0.1062,  0.1823, -0.2267,
        -0.6139,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7639e-01, -8.0101e+00, -7.4487e-01, -1.5659e-01,  1.3395e-02,
         2.5920e-01,  8.3978e-02, -2.5023e-01,  1.7422e-01, -2.6985e-01,
        -1.9049e-01, -1.2708e-01,  1.7637e-02, -2.1778e-03,  9.2940e-03,
        -2.0751e-02, -1.2630e-01, -1.5916e-01,  1.0141e-01, -4.3937e-01,
         2.6457e-01,  1.2387e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1654,  6.4820, -0.0895,  0.8043, -0.1924,  0.4556, -0.0662, -0.1941,
         0.7414,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0814,  8.5074,  0.2320, -0.4407,  0.0388,  0.0557,  0.1779, -0.1489,
         0.1104,  0.0650,  0.0169, -0.0934, -0.2584, -0.2839,  0.0444, -0.0773,
        -0.0436,  0.1425,  0.0373, -0.1571,  0.2010,  0.1373, -0.0593, -0.1624,
        -0.0727,  0.0800,  0.0349,  0.1644, -0.1637, -0.0657,  0.1624,  0.0998,
         0.0641,  0.2774,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
