Iter #50: [tensor([-0.4989,  0.1718,  0.0349, -0.2173, -0.2772, -0.1908, -0.0679, -0.3094,
        -0.1483, -0.1444, -0.0951,  0.1871,  0.1476,  0.1664, -0.1598, -0.0084,
        -0.0366,  0.1446, -0.0124, -0.1254,  0.1274, -0.1535, -0.0177,  0.3008,
        -0.5892,  0.7101,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0116, -0.0261,  0.0279,  0.0035, -0.0205, -0.0300, -0.0329,  0.0069,
         0.1444, -0.0183,  0.0053, -0.0344,  0.0108,  0.0070, -0.0383,  0.0277,
        -0.0292, -0.0472, -0.0162,  0.0265,  0.1656,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4038, -0.1061,  0.1128,  0.0478, -0.0016,  0.0462,  0.0194, -0.0808,
         0.0484,  0.0392, -0.0137,  0.0392,  0.0229,  0.0124,  0.0007,  0.0580,
        -0.1027, -0.0323,  0.1625,  0.2023,  0.1158,  0.0088,  0.0661,  0.0230,
        -0.3591,  0.1457,  0.1414, -0.0184,  0.0365, -0.0108,  0.1159,  0.0529,
         0.0035, -0.1400, -0.0426,  0.0315,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1565, -1.5665, -0.4361, -0.0771, -0.1733, -0.4465,  0.2445, -0.2517,
        -0.0801, -0.0291,  0.0397, -0.1903, -0.2542, -0.2665,  0.0162,  0.0984,
        -0.2158,  0.2462,  0.1264,  0.1980,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1845, -0.2462,  0.0480, -0.0377,  0.1398,  0.1026, -0.1139, -0.0292,
        -0.0469, -0.0525, -0.1050,  0.1703, -0.0691, -0.1116, -0.0748,  0.0622,
        -0.1309, -0.2108, -0.2686,  0.2013,  0.4361,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0059, -0.2986,  0.1681, -0.1475, -0.1020,  0.1260,  0.0697,  0.0035,
         0.0629, -0.0119,  0.0819, -0.0592, -0.2667,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6451, -0.4855, -0.3620,  0.4919, -0.3175, -0.3803, -0.1182,  0.0601,
        -0.0014,  0.2209,  0.1326, -0.0558,  0.1506, -0.1157,  0.0126, -0.1669,
        -0.0051, -0.1514,  0.0393,  0.1655, -0.1960, -0.1796, -0.4341, -0.3373,
         0.0191, -0.0312,  0.2936, -0.0399,  0.0145, -0.3723, -0.3673, -0.0485,
         0.0503,  0.0767, -0.0842, -0.1415, -0.3624], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1318, -0.0229,  0.0793,  0.0670,  0.1497, -0.0406,  0.0314,  0.1831,
         0.0523,  0.0867, -0.0143,  0.0262, -0.1380,  0.0088, -0.0155,  0.0147,
        -0.0085,  0.0207,  0.0462, -0.0123, -0.0434, -0.0185,  0.0273,  0.0302,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8304,  0.0144,  0.1583,  0.4295, -0.4615, -0.4434,  0.0932,  0.1877,
        -0.0756, -0.4741,  0.0895,  0.1037,  0.1753,  0.0640, -0.0934,  0.1736,
         0.2617, -0.2830, -0.0397,  0.1808, -0.4352, -0.3409, -0.0426,  0.0710,
        -0.0513,  0.0562, -0.0613,  0.0956, -0.1116,  0.1583, -0.2426,  1.4351,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1118,  0.0393, -0.1158,  0.0189,  0.0753,  0.0740,  0.1260, -0.0842,
        -0.0495,  0.2683, -0.0034,  0.0434, -0.0594,  0.0597,  0.0725,  0.0682,
        -0.0250,  0.0289, -0.1958, -0.1473,  0.2393, -0.3196,  0.0685,  0.1447,
        -0.1129, -0.0796, -0.0375,  0.1761, -0.0191,  0.3920, -0.0618,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2155,  0.0077,  0.2388,  0.1288,  0.3287,  0.0853,  0.0458, -0.1129,
         0.0233,  0.0006, -0.1436,  0.0516,  0.0413,  0.0637,  0.0129, -0.0138,
         0.0021,  0.0762,  0.0383,  0.0500,  0.0335, -0.2442,  0.1756,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1419, -0.0319, -0.1451,  0.0543, -0.0220,  0.0450, -0.0547, -0.0424,
        -0.0404,  0.1367, -0.0902,  0.2607,  0.0614, -0.3570, -0.2874, -0.0237,
        -0.1917, -0.0277, -0.0286,  0.0297,  0.0460, -0.2105,  0.0953,  0.1502,
         0.1702,  0.2013, -0.6636, -0.3223,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 2.7248, -1.1238, -1.5500, -0.4008,  0.4124, -0.4538,  0.3884, -0.2040,
        -0.9649, -0.6860,  0.6178, -0.4996, -0.7650, -0.2955, -0.0648,  0.6726,
         0.4304, -0.2407,  0.4534, -1.0686, -1.1409, -0.4809,  0.9899,  0.3536,
        -0.3690, -0.5105,  1.3892, -4.2043,  1.5917,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5701, -0.1791, -0.0738,  0.1006,  0.4958,  0.0303,  0.1185,  0.0961,
         0.5467, -0.6194,  0.0715, -0.0685, -0.1624,  0.2519,  0.7738,  0.2499,
         0.0518, -0.1386, -0.2631, -0.6616, -0.9126,  0.4819,  0.1129, -1.7113,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7132e-01,  5.8817e-01,  2.0410e-01,  3.3773e-01, -2.3585e-04,
         2.2513e-01,  1.2940e-02,  2.0325e-01,  2.1699e-02,  3.4319e-01,
         1.1764e-01, -1.8264e-01,  2.2473e-01,  8.6356e-02,  2.4876e-01,
         9.0439e-04, -7.7575e-02, -4.7134e-02,  2.5416e-01, -3.5134e-01,
        -4.1247e-01, -8.9588e-02, -5.1793e-02, -4.6401e-02, -2.1517e-02,
         9.1652e-03,  1.3420e-02, -1.7585e-01,  1.5570e-01, -3.8101e-01,
         1.4240e-01, -1.8115e-01, -3.4328e-01, -4.2902e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4563,  0.2797, -2.0278, -0.3532, -0.3872, -0.7042, -0.7927, -0.3940,
        -0.5184, -0.1265, -0.2415, -0.0625, -0.1460, -0.2905, -0.2687, -0.6647,
        -0.7345, -0.1313,  0.3268, -0.3474,  0.3005, -0.6243,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4265, -0.4615,  0.4185,  0.3211,  0.1142,  0.2318, -0.0311, -0.7290,
         0.1131, -0.0676, -0.0642, -0.1949, -0.1232,  0.3046,  0.2208,  0.1055,
        -0.8221,  0.8508,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2014,  0.0199,  0.0642,  0.0452, -0.0486,  0.0159, -0.2041,  0.0368,
        -0.0471, -0.0399, -0.0238,  0.0046,  0.0030, -0.0411, -0.0345, -0.1715,
         0.0594,  0.0090, -0.0969, -0.0122, -0.0154,  0.1875, -0.0871,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7129,  0.5704, -2.7007, -1.3261, -0.2392,  1.3865,  1.7186,  0.9644,
         0.8626,  1.2382,  0.3341,  0.3185, -0.1635, -1.6562,  0.4681, -0.1240,
         0.3904,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7057,  2.3033, -0.8506,  0.1069, -0.9555,  3.8878,  3.7137,  3.3615,
        -1.6460,  1.5318,  0.6747,  0.2673,  1.5743,  0.8699, -1.2178, -0.6051,
         0.9376, -1.6093,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6857, -0.0538,  0.2596,  0.0302, -0.1696, -0.0299, -0.0794, -0.2421,
        -0.1111,  0.0013, -0.0657, -0.0209,  0.0390, -0.4193, -0.0355,  0.0373,
        -0.0226,  0.1822,  0.1922, -0.0081,  0.0217, -0.0758,  0.0130, -0.0736,
        -0.5381,  0.4469,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2364,  0.1050,  0.0567,  0.0162,  0.1626,  0.5202,  0.0084,  0.0179,
        -0.0656,  0.0250,  0.0382,  0.0246,  0.0261, -0.0430,  0.0357, -0.0158,
        -0.1375, -0.0746,  0.0099, -0.0152, -0.0220,  0.1028,  0.0182,  0.0172,
        -0.0364, -0.1145,  0.0374, -0.0306,  0.1582,  0.0302, -0.0458,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0010, -0.2104, -0.0467,  0.0667, -0.0679, -0.0980, -0.0550, -0.0848,
        -0.1672, -0.2877, -0.0381, -0.0712, -0.1222,  0.0597,  0.2469,  0.2329,
        -0.3568, -0.2347,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6854, -0.3163,  0.9971, -0.4451, -0.5611, -0.0461,  0.6649,  0.0902,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.0004,  0.0554,  0.0658, -0.0754, -0.1028, -0.0008, -0.0598,  0.0275,
         0.0533, -0.0147, -0.0046, -0.1222, -0.0060,  0.0393, -0.0003, -0.0285,
        -0.0094,  0.0686, -0.0022, -0.0594, -0.0026, -0.0240,  0.0556, -0.0088,
         0.0020, -0.0289,  0.0536,  0.0466,  0.0979, -0.0388,  0.0541, -0.0543,
        -0.0307, -0.0350,  0.0190, -0.0948, -0.0225,  0.0597,  0.0416,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1404,  0.0334,  0.0826,  0.0453, -0.0452, -0.0206, -0.0434, -0.0041,
        -0.0027, -0.0166, -0.0037, -0.2106, -0.0349, -0.0192,  0.0427,  0.0073,
         0.1074,  0.0073,  0.0313, -0.1459,  0.2363,  0.0140, -0.0359,  0.0173,
        -0.1070, -0.0750,  0.2815,  0.0572,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1549,  0.2101,  0.1236,  0.0444, -0.0140,  0.0624,  0.1110, -0.0896,
         0.0132,  0.1484, -0.1806, -0.1421,  0.0005,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1113, -0.3197,  0.0185, -0.1815, -0.0598, -0.0674, -0.1535,  0.1445,
        -0.0149, -0.1002, -0.0199,  0.1097,  0.0743,  0.2659, -0.2764,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5604e-01,  4.3074e-02,  7.1187e-02,  1.5444e-01, -1.8454e-02,
         8.6437e-02,  3.3207e-02, -5.1471e-02,  1.4486e-02, -5.9546e-02,
        -2.9992e-02,  1.2441e-01,  1.5519e-02, -1.8701e-02,  9.5778e-02,
         2.7190e-02, -8.2339e-02,  1.4290e-01,  2.1431e-02,  9.9596e-02,
         1.2078e-02,  5.3837e-02, -4.5123e-03,  1.0278e-01,  1.3447e-01,
        -1.3030e-01,  4.4666e-02,  4.1668e-02,  6.4221e-02,  7.5675e-02,
         6.9002e-02, -5.7123e-02,  7.4332e-02, -8.6690e-03,  7.5620e-03,
         3.8953e-02, -1.4566e-01, -1.7025e-05,  1.8866e-02, -6.6725e-02,
         4.1573e-02,  8.4638e-02, -3.9537e-02, -7.8485e-03, -1.0962e-01,
        -2.0310e-01, -4.2549e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1494, -0.1513, -0.0147,  0.1054, -0.0243, -0.0821, -0.0185,  0.0055,
         0.0980, -0.0407,  0.1418,  0.1253,  0.0129, -0.0183,  0.0015, -0.0196,
        -0.0206,  0.0316, -0.0658,  0.0057,  0.0182,  0.0071, -0.0082,  0.0484,
        -0.0706, -0.0241, -0.0605, -0.0060, -0.0461,  0.0197, -0.0338,  0.0052,
         0.0059, -0.0173,  0.0268,  0.0119, -0.0261, -0.0424, -0.0027,  0.0204,
        -0.0234, -0.1668,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0566, -0.0862, -0.1497,  0.0155, -0.0411,  0.1439,  0.2766,  0.0920,
        -0.0474,  0.0908,  0.2404, -0.0656, -0.0036, -0.0140,  0.0720, -0.0881,
        -0.0788, -0.0599, -0.0413, -0.0809, -0.0477,  0.0072,  0.0561,  0.1473,
         0.0684,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0380,  0.0026,  0.0495,  0.1205,  0.1751,  0.1698,  0.2757,  0.0828,
        -0.0025,  0.0161,  0.0370, -0.1455,  0.0644, -0.1661,  0.0609,  0.0554,
         0.0523,  0.1276,  0.3413,  0.5573, -0.4007, -0.4974,  0.6357,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5133,  0.2713,  0.1267, -0.0181,  0.0505,  0.1690,  0.0020, -0.0409,
        -0.0370, -0.0755, -0.4130, -0.1379, -0.0423, -0.0317,  0.0478, -0.0224,
        -0.2593, -0.0279,  0.0277, -0.0794, -0.1955, -0.4043,  0.0914,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0341,  2.1848, -0.5149, -0.3187,  0.8408,  0.2702,  0.8613,  2.0459,
        -0.5280,  1.3876,  0.2049,  2.5261,  0.4919,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3388e+00,  9.5134e-01,  1.6124e-01, -1.1499e-01,  2.1197e-03,
         1.7358e-01,  2.9111e-01, -6.4377e-01, -3.3043e-01, -7.2217e-01,
         1.2652e-01,  1.3622e+00, -2.0232e-01, -3.4850e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0105,  3.4627, -2.5418,  4.6303,  4.0879,  3.4721,  1.2004,  3.9325,
        -1.6778,  2.1513,  0.3353,  2.6946,  0.8185,  1.6639,  1.2784, -5.4441,
         8.5127, -0.6288, -3.0339,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.2426,  0.0046, -0.0371, -0.0798, -0.0452, -0.0813,  0.0801,  0.0679,
         0.0215,  0.1089,  0.0808,  0.0955,  0.0247,  0.0255, -0.0074,  0.1221,
         0.1150,  0.0426, -0.0715,  0.0126, -0.0568,  0.0068, -0.0585, -0.0590,
        -0.3105,  0.1285,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1103,  0.0649, -0.0079, -0.0238, -0.0077, -0.0309, -0.0467,  0.0422,
         0.0019,  0.0385,  0.0222, -0.0293,  0.0633,  0.0416, -0.1132, -0.0877,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2887, -0.0466,  0.0219, -0.1480, -0.0786, -0.1891, -0.1357,  0.0482,
        -0.0800,  0.0626,  0.0067,  0.2066, -0.0326,  0.0420,  0.0520, -0.0555,
        -0.2642,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0798e-01, -1.4776e-02, -3.7052e-01,  3.2044e-04,  3.3067e-02,
         1.9604e-02,  1.3403e-02,  3.5974e-02, -6.0268e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1826, -0.0230,  0.0197,  0.0559,  0.0723, -0.1833,  0.0669, -0.0361,
        -0.0555,  0.0283,  0.0225, -0.0316,  0.0903,  0.1703,  0.0415, -0.0280,
         0.0105, -0.0017, -0.0315, -0.0955, -0.0277,  0.0411,  0.0633,  0.0079,
         0.0116,  0.0130, -0.2030,  0.1493, -0.0165,  0.0206,  0.0188,  0.0738,
         0.0538,  0.0480, -0.0170,  0.0687, -0.0369, -0.0757,  0.0771,  0.0074,
         0.0056,  0.1028, -0.0433,  0.1326,  0.0868, -0.1984], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2576, -0.0906, -0.0923, -0.1036,  0.0054, -0.0203,  0.0210,  0.0712,
         0.0320, -0.0539, -0.0924, -0.1066, -0.2178, -0.0523,  0.0199, -0.0640,
        -0.0252,  0.0156,  0.0378,  0.0405,  0.0877,  0.0281, -0.0381,  0.0018,
        -0.0112,  0.0527,  0.0369, -0.0348, -0.1818, -0.1990,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4420,  0.1469,  0.2901,  0.5732,  0.2200,  0.0234, -0.1033,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1419,  0.0076, -0.0891,  0.0797,  0.0767, -0.2305,  0.0068, -0.0056,
         0.0989, -0.0877, -0.1145,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1779, -0.0348, -0.1182, -0.1379, -0.0282,  0.0774,  0.2463, -0.0278,
         0.3838,  0.0938,  0.0052,  0.0800,  0.2909,  0.1449,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0638,  0.0253,  0.1310,  0.2659,  0.0297, -0.0640, -0.0025, -0.1470,
         0.0682, -0.1164, -0.0842,  0.2288,  0.0665, -0.0687, -0.0013, -0.1241,
         0.0143, -0.2308,  0.0807, -0.0086, -0.0115, -0.0798,  0.1883, -0.0699,
         0.1094,  0.1635, -0.1908, -0.1604, -0.3217, -0.1843,  0.0171, -0.0368,
        -0.2321,  0.1246,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1538,  0.1563,  0.1005,  0.0810, -0.0110, -0.0319,  0.1037, -0.1037,
        -0.0047, -0.0004,  0.0266,  0.0084,  0.0003, -0.0053,  0.0211,  0.0343,
        -0.1732,  0.0266, -0.0275, -0.0074,  0.2317,  0.0169,  0.0839,  0.0089,
        -0.1108, -0.0283,  0.2357, -0.0658,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0811,  0.0089, -0.0266, -0.0397, -0.0137,  0.0456,  0.0187, -0.0191,
        -0.0053, -0.0549, -0.1293, -0.0433, -0.0358, -0.0243,  0.1587, -0.0628,
         0.0603, -0.0115, -0.1225, -0.0456, -0.0812, -0.0301,  0.1281, -0.0347,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([-0.3558,  0.5508,  0.2682,  0.0468,  0.0354, -0.0406,  0.0477, -0.0886,
        -0.0946, -0.0158,  0.0104, -0.0922,  0.0747, -0.1406,  0.0055, -0.0017,
         0.1040,  0.0133,  0.0889, -0.1016, -0.0036,  0.0738, -0.0087,  0.0287,
        -0.0112,  0.0849,  0.0150, -0.0197,  0.1421, -0.6808, -0.1432,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0295,  0.1074,  0.1368,  0.0936,  0.0567,  0.1679, -0.0849, -0.0608,
        -0.6798, -0.1711,  0.2275,  0.3485,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0116,  0.1830, -0.1192,  0.0082,  0.0377,  0.0462,  0.0746,  0.0834,
         0.0267, -0.1343, -0.0488,  0.0055, -0.0349,  0.4248, -0.1097,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0536, -0.0903,  0.0032, -0.0601, -0.1645,  0.1305, -0.1069,  0.0069,
        -0.0130,  0.0555,  0.0420,  0.0255,  0.0063,  0.0308, -0.0381,  0.0224,
        -0.0200,  0.0218, -0.0395, -0.0732,  0.0270,  0.0079,  0.0352,  0.0088,
        -0.0329,  0.0084, -0.0134, -0.0059, -0.0032,  0.0004, -0.0341,  0.0445,
         0.3064], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5239, -0.0006, -0.2428,  0.0357, -0.0493,  0.0343,  0.0327,  0.0267,
         0.1194,  0.0621, -0.0612,  0.0403,  0.0554,  0.0206,  0.3227,  0.2064,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 11.9111,  -2.5485, -13.1100, -13.0394,  -3.2625,   1.7142,  -2.4446,
         -6.5080,  -0.0965,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4036,  0.2463, -0.0491,  0.4265, -0.1794,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1744,  0.0260, -0.0661, -0.0659, -0.1295, -0.0435, -0.0576,  0.0203,
         0.0959, -0.1867, -0.2111, -0.0807,  0.1863, -0.1638, -0.0045, -0.0735,
         0.0132, -0.1183, -0.0271, -0.0390,  0.0307, -0.0639, -0.0265, -0.0529,
         0.0308,  0.2472,  0.0831,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5423, -0.3176, -0.2306, -0.3781, -0.3625, -0.1580,  0.0380,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0457, -0.2963,  0.1200, -0.0072, -0.0325,  0.0166, -0.0737, -0.0490,
        -0.0426,  0.0009, -0.0257,  0.0113, -0.0599,  0.0833,  0.0422, -0.0389,
        -0.0126, -0.0061, -0.0758, -0.1650, -0.0928, -0.0878,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0394,  0.0347,  0.0317,  0.0325, -0.1076,  0.0175,  0.0381, -0.0636,
         0.0216, -0.0470,  0.0431, -0.0270, -0.0134, -0.0579, -0.0046,  0.0994,
         0.0383,  0.0045,  0.0637,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2085, -0.2097, -0.0489, -0.0802, -0.0509, -0.1725,  0.0910, -0.0659,
        -0.0523,  0.0244, -0.0231,  0.0108, -0.0672, -0.0586, -0.0024, -0.0540,
        -0.0007, -0.0131, -0.0083,  0.1274,  0.0139,  0.0697,  0.0106, -0.0157,
        -0.0299,  0.0482, -0.0049, -0.0445, -0.1417,  0.2192,  0.0010,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.2393, -0.0285,  0.0407,  0.0831,  0.0503,  0.0420,  0.0044,  0.0815,
        -0.0964,  0.0414,  0.0427, -0.0040,  0.0574, -0.0013, -0.1214, -0.0783,
         0.4356,  0.0118, -0.1810, -0.0132, -0.0413,  0.0388, -0.0841, -0.0427,
        -0.2378, -0.0144, -0.1817, -0.1011, -0.0016,  0.0373,  0.3426, -0.0728,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5141, -0.2897,  0.0752, -0.0520,  0.0615,  0.0997, -0.0701, -0.0510,
         0.2650, -0.0516, -0.1794,  0.1064,  0.0048, -0.5039, -0.1115,  0.0266,
        -0.0063, -0.0235, -0.0675, -0.1240,  0.3642, -0.0468,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0632,  0.0111, -0.0210,  0.0491, -0.0428, -0.0368, -0.0626, -0.0390,
        -0.0568,  0.0658,  0.0219,  0.0119,  0.0137, -0.0358,  0.0616, -0.0018,
        -0.0530, -0.0070,  0.0161, -0.2339, -0.0169,  0.1145,  0.0202, -0.0075,
         0.0244, -0.1712, -0.0584, -0.0660,  0.0312,  0.1137,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4408, -0.0782, -0.1828,  0.1430, -0.0388, -0.0433, -0.0568,  0.0091,
        -0.0909,  0.0212, -0.0538, -0.0855, -0.0837,  0.1252,  0.0973, -0.2905,
        -0.5107, -0.2468,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0519,  0.0121, -0.0216, -0.0159, -0.0361, -0.0313, -0.0466, -0.0209,
        -0.0890,  0.0249, -0.0810,  0.0624, -0.0599, -0.0094, -0.0923, -0.0265,
        -0.0272, -0.0794, -0.0170, -0.1079,  0.0383,  0.0599, -0.0622, -0.0061,
        -0.0319,  0.0169, -0.0256, -0.1878,  0.0546,  0.0201, -0.0346, -0.0548,
        -0.0136, -0.0210, -0.0234, -0.0419, -0.1268,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0033,  0.0202,  0.0035,  0.0802, -0.0503, -0.0545,  0.1191,  0.0196,
         0.0009,  0.0011, -0.0209, -0.0663,  0.1093, -0.0143,  0.0896,  0.0147,
         0.3900, -0.0756, -0.0846, -0.0119,  0.1940,  0.0935, -0.0652, -0.0186,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1026,  0.0177, -0.0185,  0.0114, -0.0165,  0.0136, -0.0679, -0.0046,
        -0.2413, -0.0130, -0.0216,  0.0270, -0.0620,  0.0011,  0.0296,  0.0122,
         0.0023,  0.0349,  0.0167,  0.0110, -0.0017,  0.0044, -0.0091, -0.0344,
        -0.0851, -0.0189, -0.0190,  0.0024, -0.0212,  0.0547, -0.0018, -0.0721,
         0.0126, -0.0176, -0.0266, -0.0153, -0.0245, -0.0285,  0.0183, -0.0211,
        -0.0204,  0.0099,  0.0145,  0.0052,  0.0502, -0.1732,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0655,  0.0594,  0.0732,  0.0036, -0.0667, -0.0524,  0.0427, -0.0094,
         0.0059, -0.1297,  0.0800,  0.0120, -0.1761, -0.0190, -0.0256,  0.0038,
        -0.0964, -0.0473,  0.0023,  0.0146,  0.0215,  0.1364,  0.0868,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0379,  0.1165, -0.1205, -0.0565,  0.0164,  0.0917, -0.1303, -0.1136,
        -0.0412,  0.2743,  0.0578,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2750, -0.0488, -0.0564,  0.0971,  0.1565,  0.2002,  0.2446, -0.0139,
         0.0195, -0.1320, -0.3031,  0.0673,  0.3694,  0.0316,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2023,  0.3063,  0.1566,  0.0393, -0.0226,  0.0403,  0.0982,  0.0511,
         0.0679,  0.0023, -0.1447,  0.2215,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3636, -0.0169,  0.1139,  0.1916,  0.1661, -0.0193,  0.0403, -0.0945,
        -0.0108, -0.1656, -0.0144, -0.2876,  0.0290, -0.0230, -0.0590,  0.0436,
         0.0103,  0.1586,  0.0102,  0.0838,  0.0235,  0.0474, -0.0633,  0.0740,
        -0.1691,  0.0892,  0.0613, -0.0629, -0.1564,  0.1331, -0.1533,  0.0769,
         0.0529, -0.0497,  0.0307, -0.0759,  0.0298, -0.0528,  0.0399,  0.0455,
        -0.0067, -0.0046,  0.0089,  0.0127,  0.0204, -0.0774,  0.1990,  0.0562,
         0.0317,  0.0075,  0.1421,  0.1091,  0.0691,  0.0296, -0.2586],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.8840, -0.0292,  0.1515, -0.0035,  0.0059,  0.1631, -0.1172,  0.0879,
         0.1888, -0.0247, -0.2003,  0.0170, -0.1165,  0.1417, -0.2694, -0.1666,
         0.0516,  0.0499,  0.0974, -0.0226,  0.1506,  0.2882, -0.0377,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0839,  0.0375,  0.0207, -0.1417, -0.0360,  0.0172, -0.0279, -0.0364,
        -0.0376, -0.0018, -0.0255, -0.1014, -0.0034, -0.0417,  0.0366, -0.0488,
        -0.0591, -0.0820,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5714, -0.0265,  0.0808, -0.0460,  0.1064,  0.0320, -0.2892,  0.0723,
         0.0442,  0.0077,  0.0692,  0.0071, -0.2998,  0.0426,  0.1037,  0.1432,
        -0.1010, -0.2675,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1916, -0.0652, -0.1133, -0.1735,  0.1376,  0.1943,  0.1769, -0.1268,
        -0.0931,  0.1686,  0.0409, -0.3458,  0.0766, -0.0880, -0.2223,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4385e-01, -4.8237e-02,  1.0646e-01, -5.6677e-02,  2.4674e-02,
         2.0825e-02, -4.0468e-02,  1.2088e-02,  1.1738e-01, -1.6776e-02,
         1.5508e-02,  8.2625e-02,  2.9639e-03,  3.3397e-02, -6.8772e-03,
        -9.9405e-02, -4.4019e-02,  5.1645e-02, -1.0573e-04,  4.4006e-02,
        -6.9934e-03, -4.2555e-02,  3.8644e-02,  2.9720e-02,  3.7683e-02,
         1.2379e-02, -1.5440e-01,  4.3523e-02, -5.2696e-02,  1.5770e-01,
         1.1402e-01, -1.9662e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2241,  0.0222,  0.0123, -0.0231, -0.0510,  0.0128, -0.0341, -0.0178,
        -0.0460,  0.0474,  0.0716, -0.0282, -0.0390, -0.0081,  0.0264,  0.0134,
        -0.0257,  0.1051,  0.0318, -0.0015, -0.0415,  0.1406,  0.0856, -0.0016,
        -0.0802, -0.0477, -0.0306, -0.0734, -0.0588, -0.0422, -0.0292, -0.1087,
         0.1509,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1812,  0.0241, -0.0090, -0.0330, -0.0151, -0.0448,  0.0255, -0.0043,
         0.0623,  0.0966,  0.0198,  0.0150,  0.0886, -0.0113,  0.1851, -0.0230,
        -0.0051,  0.0545,  0.0362,  0.0252,  0.0286,  0.0077,  0.0727,  0.0286,
        -0.0680,  0.0125,  0.0445, -0.0101,  0.0425,  0.0629,  0.0504,  0.0375,
         0.0317, -0.0136, -0.0488,  0.0928, -0.0695, -0.0328, -0.0197, -0.0010,
        -0.0075, -0.0280, -0.0518,  0.0216, -0.0005,  0.0251,  0.0132, -0.1810,
         0.0141, -0.0515, -0.1837, -0.0174,  0.0204, -0.0106,  0.0682,  0.0163,
         0.1473, -0.0418], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0192,  0.0549,  0.0165, -0.0266,  0.0011,  0.0338,  0.0408, -0.0238,
        -0.0074,  0.0070, -0.0062, -0.0608,  0.1281,  0.0076,  0.0188, -0.0766,
         0.0195, -0.0135, -0.0157, -0.0122, -0.1027, -0.1112, -0.0255,  0.0574,
        -0.0565, -0.1810,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1009,  0.3332, -0.0214,  0.1516,  0.0306,  0.0761, -0.0075, -0.0624,
        -0.0716,  0.1795, -0.0117, -0.0193,  0.0239,  0.0313, -0.0525, -0.0482,
        -0.1221,  0.0043,  0.0309, -0.0180,  0.0345, -0.0955, -0.0303, -0.3995,
        -0.0679,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4405e-02, -2.8355e-01, -9.7433e-02,  1.4559e-02,  2.3278e-02,
         8.4461e-02, -1.8411e-01,  1.1465e-01,  2.4096e-02, -1.1845e-01,
        -1.0304e-01,  1.5432e-02, -8.4557e-02,  5.6664e-03,  9.2770e-03,
        -1.8708e-01, -2.2143e-02,  1.8937e-02,  1.2547e-02,  1.5571e-02,
         1.2444e-02, -2.8177e-03,  3.8603e-02,  7.0303e-03, -8.7463e-02,
        -4.1228e-03,  4.0233e-02,  2.4512e-02,  2.6577e-02, -6.0025e-03,
         3.8292e-02,  4.8576e-02, -6.2440e-02,  1.6239e-04, -6.7867e-02,
         3.1503e-02, -1.2144e-02, -8.3072e-03,  1.8280e-02,  5.2353e-02,
        -2.7239e-02,  6.9466e-02,  8.0901e-02, -7.6101e-02,  2.1391e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5604e-01, -1.6506e-02,  3.2067e-02,  1.3307e-01, -6.1971e-03,
         7.5831e-02, -6.5021e-02, -1.5172e-01,  4.5895e-02,  1.6470e-02,
        -3.0298e-02,  2.6088e-02, -1.6885e-01,  1.6452e-04, -4.0333e-03,
         9.7708e-02, -3.9958e-02, -1.7871e-02,  2.8606e-02, -4.2913e-02,
         6.8379e-03,  3.1008e-02,  2.5764e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2154, -0.4946,  0.0409, -0.1248,  0.0024, -0.0680, -0.0008,  0.0147,
         0.0397, -0.0969, -0.0697,  0.1162,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.3217, -0.0446, -0.0831,  0.0068, -0.0685, -0.0212, -0.0168, -0.1382,
         0.1999, -0.0039,  0.3713,  0.1769,  0.0834,  0.1067,  0.2236,  0.1563,
        -0.0929, -0.2121, -0.2343,  0.0640, -0.0696, -0.0378,  0.1620,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0563,  0.2123,  0.0815,  0.0184,  0.1338, -0.0415, -0.0261,  0.0121,
        -0.0263, -0.0366, -0.0243,  0.0124,  0.0232, -0.0246,  0.0011, -0.0062,
         0.0008, -0.1035, -0.0484, -0.0634,  0.0801, -0.0681, -0.0187,  0.0306,
         0.0360,  0.0800, -0.0729,  0.0739, -0.0240,  0.1215, -0.0631, -0.2329,
        -0.1769], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2745,  0.1487, -0.1918, -0.0277, -0.3342, -0.0912, -0.0767, -0.0953,
        -0.0488, -0.0064,  0.0255,  0.1048,  0.0531, -0.0381,  0.0223, -0.0776,
        -0.0619, -0.1945, -0.1073,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0505,  0.3230,  0.0853,  0.0620,  0.0953, -0.0083,  0.1003, -0.1241,
         0.1214, -0.1375, -0.0292, -0.0107,  0.0564, -0.2632,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0800, -0.6128, -0.0203,  0.0196,  0.0746, -0.0929, -0.0400,  0.0838,
        -0.1789,  0.0843, -0.0042, -0.1296, -0.0038, -0.2026, -0.0339, -0.0482,
        -0.1192, -0.0785,  0.0284,  0.1161, -0.1633,  0.0811,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0355, -0.1044,  0.0379,  0.0119,  0.0014, -0.0213,  0.0051,  0.0036,
        -0.0100,  0.0328, -0.1209,  0.1807,  0.0538, -0.0325,  0.0070, -0.0252,
         0.2111,  0.0113, -0.0646, -0.0116,  0.0355, -0.0420,  0.1424, -0.0698,
         0.0659,  0.0227, -0.0497, -0.1064,  0.1511,  0.0420,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5131, -0.4256,  0.0936, -0.1555,  0.0570, -0.0333, -0.0146,  0.1085,
         0.1383, -0.2015, -0.2181, -0.0110, -0.1792,  0.1078, -0.1250,  0.0542,
         0.3495,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1180, -0.0016,  0.0879,  0.0684, -0.0390,  0.1099,  0.0897,  0.1053,
         0.4052,  0.1782, -0.1011,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7573,  0.3773,  0.0977, -0.0885,  0.1020, -0.1044,  0.0235, -0.0623,
        -0.0334,  0.0651,  0.0137, -0.0645, -0.1495,  0.0123, -0.1571,  0.1062,
         0.0099,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2784,  0.1311, -0.0699,  0.0439,  0.2195,  0.0452,  0.0111, -0.0571,
         0.0339,  0.0109,  0.0094,  0.1979, -0.0162,  0.0227,  0.0122,  0.0469,
         0.0635, -0.0914,  0.1044,  0.0273,  0.0368,  0.1147,  0.0383, -0.0098,
         0.0370,  0.0391,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2769,  0.0465,  0.0605, -0.0867,  0.0998,  0.1473, -0.1906, -0.0909,
         0.1826, -0.0628,  0.0301, -0.0624,  0.0617, -0.0668, -0.0407, -0.0789,
        -0.0022, -0.0342,  0.0467, -0.1595,  0.0626,  0.0314,  0.0338, -0.0030,
         0.0195, -0.0115, -0.0621,  0.0427, -0.0009,  0.1920,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6765e-01, -4.1519e-01, -6.5195e-02, -9.8534e-02, -6.2625e-02,
        -7.1359e-02,  6.9918e-02, -1.2105e-01,  1.2772e-01,  5.4133e-03,
         1.6058e-02,  4.9943e-02,  1.6304e-02, -6.0561e-02, -1.2204e-01,
         1.1016e-01,  6.9081e-04, -2.3375e-01,  8.6623e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.9998, -0.2526, -0.2262, -0.0946, -0.1252, -0.0565, -0.0905, -0.0769,
         0.0168,  0.0438,  0.0391,  0.0695, -0.0403,  0.0537, -0.0717, -0.1134,
         0.0593,  0.0878, -0.0320, -0.1365,  0.0820, -0.0940,  0.0063, -0.1398,
         0.1420, -0.2103,  0.7487,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1126, -0.0053, -0.0451, -0.3481, -0.0250, -0.0963, -0.0902,  0.0576,
         0.2096,  0.0406, -0.1301, -0.1055, -0.0786, -0.0800,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0398,  0.1426,  0.1674, -0.0086, -0.0929,  0.0190, -0.0021,  0.0839,
        -0.0472, -0.0220,  0.1015, -0.0582, -0.0157, -0.0468,  0.0491,  0.0014,
        -0.0178, -0.0324, -0.1064, -0.0189, -0.0006,  0.0172, -0.0158, -0.0983,
        -0.1428,  0.0937,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6441,  0.2980, -0.2003, -0.1564, -0.2638,  0.0573,  0.3461, -0.3733,
         0.0968,  0.4437,  0.1995, -0.5237,  0.0612, -0.5161,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0929,  0.0927,  0.0232, -0.0771,  0.0784,  0.0480, -0.0551, -0.1370,
        -0.0159, -0.0706, -0.0320,  0.0015,  0.0529, -0.1651,  0.0879,  0.0256,
        -0.0707, -0.1462,  0.0575, -0.0117, -0.1445,  0.1754, -0.0913, -0.1636,
        -0.3243, -0.0679,  0.1274, -0.2584, -0.1338, -0.0414, -0.2097,  0.0644,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5312,  0.1202, -0.0592,  0.0128,  0.0132, -0.1198,  0.0091, -0.0283,
        -0.0038,  0.0670, -0.0089,  0.0033,  0.0440, -0.0158,  0.0281,  0.0008,
        -0.0457, -0.1497, -0.0197,  0.0038, -0.0096,  0.0268, -0.0236,  0.0185,
        -0.0763,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7803,  0.2615,  0.2688,  0.1562, -0.4588,  0.5760,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1423,  0.1842, -0.0571, -0.0154,  0.0876,  0.0088, -0.0371, -0.0631,
        -0.0388, -0.0491, -0.0529,  0.0081, -0.0150,  0.0187,  0.0203,  0.0101,
         0.1007, -0.0696, -0.0036,  0.0119,  0.0021, -0.0403, -0.0606,  0.0163,
         0.0028,  0.0394, -0.0139, -0.0027, -0.0485,  0.0317,  0.1207,  0.0495,
        -0.0501,  0.0022, -0.0069, -0.0235, -0.1026], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0722,  0.2324,  0.0749,  0.0337,  0.0108,  0.0352, -0.1119, -0.0521,
         0.0014, -0.0044,  0.0548,  0.0361,  0.0348,  0.1412,  0.0351, -0.0802,
        -0.1428, -0.0617,  0.0460, -0.0376,  0.0153, -0.0712,  0.0213, -0.0318,
         0.1447, -0.2324, -0.0601,  0.1767, -0.0255, -0.0149, -0.0152,  0.0421,
         0.0396, -0.2355, -0.1782,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6840,  0.3241, -0.0348, -0.1146, -0.3314, -0.0637,  0.0506, -0.0191,
        -0.0609,  0.2382,  0.1397,  0.1906, -0.1921, -0.4858, -0.1597,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0097, -0.1875, -0.0259, -0.0101,  0.0840, -0.0515, -0.0775,  0.2046,
         0.0994,  0.0093, -0.0562,  0.0257, -0.0402,  0.0061,  0.0483,  0.0081,
         0.0023, -0.0494, -0.1638, -0.0571, -0.0759, -0.0279, -0.0761,  0.0203,
        -0.0528, -0.0923, -0.0585, -0.0215, -0.0016, -0.0230, -0.0661,  0.1154,
        -0.0190,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2385,  0.2470,  0.0795,  0.1306,  0.0694, -0.1298, -0.0499,  0.0224,
        -0.0167,  0.0085,  0.0200,  0.0274,  0.0046, -0.1094, -0.0643,  0.0701,
         0.0372, -0.1629,  0.2267, -0.0517,  0.0457, -0.0031, -0.0413, -0.0024,
         0.0244, -0.0076, -0.0879, -0.2813,  0.0023,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.2736, -0.2819, -0.0948,  0.0081, -0.0178, -0.0597, -0.1323,  0.0479,
        -0.0768, -0.2339,  0.0597,  0.1132, -0.0112,  0.0719, -0.0382,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3037,  0.0782,  0.0434,  0.0291,  0.0265,  0.0879,  0.0716, -0.0358,
         0.1930,  0.1327,  0.0798, -0.0713,  0.0076, -0.0484,  0.0133, -0.0251,
        -0.0316,  0.0062, -0.0005, -0.0480, -0.0062, -0.0559, -0.0358,  0.0297,
        -0.0559,  0.0128,  0.0010, -0.0440, -0.0461, -0.0028,  0.0126,  0.0429,
        -0.0032, -0.0151,  0.0244,  0.0693,  0.0383, -0.0265, -0.0127, -0.0310,
         0.0187, -0.0298, -0.0785, -0.0449, -0.0117,  0.0300,  0.0527, -0.1531,
         0.0965], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0302, -0.0721, -0.0946,  0.0597,  0.0173, -0.1224, -0.1783,  0.0457,
         0.0803, -0.1338,  0.0210, -0.0761, -0.1244, -0.0302,  0.0248, -0.0872,
         0.1679,  0.0106, -0.0161,  0.0955,  0.1074,  0.0153,  0.0435, -0.0321,
        -0.0279, -0.0179,  0.0070, -0.0478, -0.0207,  0.0061, -0.0059,  0.0786,
         0.0156, -0.1925,  0.1803,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0249, -0.0146, -0.0542,  0.0393,  0.0064,  0.0254,  0.0554,  0.0356,
         0.0216,  0.0636,  0.1002,  0.0728,  0.0452,  0.0074, -0.1197,  0.0497,
        -0.0449, -0.2436,  0.0396,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1680,  0.3604,  0.1286,  0.0041, -0.0864, -0.0615,  0.1922, -0.0017,
         0.1351, -0.0338, -0.0432,  0.0771,  0.0924, -0.2862, -0.0285,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0877, -0.0544,  0.0864,  0.0909,  0.0213,  0.0129,  0.0191,  0.0753,
        -0.0211,  0.0186, -0.0172,  0.0660, -0.0476, -0.0124,  0.0087, -0.0862,
        -0.0147, -0.0060,  0.0298, -0.0350,  0.0366,  0.0254,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0560,  0.1296,  0.1496,  0.0486, -0.0157,  0.0383,  0.1353, -0.0442,
         0.0520,  0.0380, -0.0143, -0.0262, -0.0127,  0.0272,  0.0090, -0.0014,
        -0.0127, -0.0311, -0.0745,  0.0100,  0.0807,  0.0534, -0.0906,  0.0285,
        -0.0148,  0.0371, -0.0638,  0.0298,  0.0762, -0.0482,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3415, -0.1970, -0.0638, -0.1753, -0.4059,  0.0759,  0.0411,  0.2012,
        -0.0744, -0.2168,  0.2053,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0839,  0.1213, -0.1243,  0.0294, -0.0604, -0.2704, -0.0527, -0.0495,
        -0.0035,  0.0416, -0.1423, -0.0725, -0.0012, -0.0332, -0.0165,  0.0737,
         0.0163, -0.0043, -0.0343, -0.0773, -0.0771, -0.3784,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0261,  0.2814, -0.2361,  0.0071,  0.0211, -0.2236, -0.0460,  0.0991,
        -0.0675, -0.0081, -0.0009,  0.2219, -0.0023, -0.0189,  0.0389,  0.1238,
         0.0255,  0.0284, -0.0265,  0.0495,  0.0173, -0.0304,  0.1555,  0.0956,
        -0.0392, -0.0353, -0.0153, -0.0349,  0.0465,  0.1111,  0.0192,  0.0572,
        -0.0286,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4665, -0.3088, -0.1309, -0.0753, -0.1463, -0.0065,  0.0984,  0.0590,
        -0.0842, -0.0503, -0.0358, -0.0599,  0.2224, -0.1049, -0.1419, -0.2940,
         0.0126,  0.4237,  0.0665,  0.4278,  0.2904, -0.3347, -0.0409,  0.0188,
        -0.0340, -0.1224,  0.1580,  0.1125,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0268, -0.5562, -0.0990, -0.0107,  0.0723, -0.0292, -0.0387, -0.0934,
        -0.1696,  0.0104, -0.1604,  0.0489, -0.0215,  0.0756, -0.0297,  0.1280,
        -0.0035,  0.0221,  0.0232, -0.0330, -0.0132, -0.0176,  0.1344,  0.0485,
        -0.0218,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.0051,  0.1721, -0.0136,  0.0202,  0.1426,  0.0455, -0.0062,  0.0123,
         0.0402, -0.0619, -0.0070, -0.0095, -0.0309, -0.0373,  0.0098, -0.0142,
         0.0197,  0.0324,  0.0245, -0.0047, -0.0181,  0.0058,  0.0371,  0.0127,
         0.0277,  0.0106,  0.0193,  0.0306,  0.0543, -0.1264, -0.0090, -0.0243,
        -0.0124,  0.0304,  0.0327, -0.0304, -0.0010, -0.0128,  0.0569, -0.0065,
        -0.0772,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6023, -0.2468, -0.0575,  0.0878, -0.0036,  0.1278, -0.1125, -0.0454,
        -0.0685, -0.0051,  0.0470, -0.0104,  0.1176,  0.0558,  0.0367,  0.0075,
        -0.0828, -0.0953,  0.0444, -0.0590,  0.0308, -0.0091,  0.0342,  0.2447,
         0.1203,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1185,  0.1271, -0.0541,  0.0870,  0.0058,  0.1152,  0.1119,  0.0285,
        -0.0918, -0.0625, -0.1942, -0.0396, -0.0060,  0.0188,  0.0652,  0.0164,
         0.0013, -0.0495, -0.1547, -0.0613, -0.0464,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0022, -0.2852, -0.0381,  0.0692, -0.0305, -0.0700, -0.0200, -0.0771,
         0.0257, -0.2498, -0.0442,  0.0293, -0.0016, -0.0914,  0.0038, -0.0329,
         0.0529, -0.0514,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0635, -0.0306,  0.0507,  0.0189,  0.0120, -0.0065, -0.0367, -0.0419,
         0.0152, -0.0031,  0.0448, -0.0261, -0.0514, -0.0934, -0.0061,  0.0389,
        -0.0562, -0.0761,  0.0372,  0.1028,  0.0416, -0.0131,  0.0092, -0.0081,
         0.0845,  0.0047,  0.0328,  0.0312,  0.0479,  0.0018, -0.0082,  0.0234,
        -0.0044, -0.0503,  0.0541,  0.0369, -0.1567,  0.0174,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2200,  0.1796,  0.1816, -0.0594,  0.1305,  0.0007,  0.1316, -0.1555,
         0.0245, -0.0354, -0.0583,  0.0612,  0.0851,  0.0522,  0.0878,  0.1220,
         0.0108, -0.2256, -0.1753, -0.2570,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3377e-01,  2.3206e-02, -1.2088e-01, -2.6564e-02, -5.9137e-02,
         2.2042e-01,  1.7119e-02, -1.5662e-01,  1.5121e-01,  7.1844e-02,
        -8.5766e-02,  1.2919e-04,  1.9407e-01, -2.0593e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0484,  0.0748,  0.0592,  0.0573, -0.0717, -0.0646, -0.0638, -0.0166,
        -0.0709,  0.0671, -0.0169,  0.0020,  0.0113, -0.0429,  0.0069, -0.0125,
         0.1108, -0.0452, -0.0977, -0.0490,  0.0798, -0.0939, -0.0454, -0.0638,
        -0.0220, -0.0258,  0.0047, -0.0368, -0.0034,  0.0845,  0.0464, -0.1703,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1967, -0.0260, -0.0817, -0.1395,  0.1118,  0.0684, -0.0561, -0.0020,
        -0.0007,  0.1766, -0.0374, -0.0620, -0.0394, -0.1244, -0.0816, -0.0093,
         0.0338,  0.0125, -0.0866, -0.1297,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9042e-01, -3.7804e-01, -1.4446e-01,  7.1241e-02, -1.6046e-01,
        -5.9261e-02,  2.9983e-02,  1.6961e-02,  3.7739e-02,  1.5097e-02,
        -3.3759e-02, -1.1202e-02,  7.8023e-03,  1.1733e-02,  3.2911e-02,
         3.0542e-02,  5.5808e-03,  4.0339e-02,  5.8159e-02,  3.0899e-02,
         5.6440e-02,  2.4360e-03, -4.3146e-03, -4.3904e-02,  1.6924e-02,
         5.7117e-02, -1.0045e-02, -5.1709e-03, -6.1597e-02,  6.5265e-02,
        -2.0659e-01,  5.8285e-03, -3.5791e-02, -8.2121e-02,  2.3539e-02,
        -3.3434e-02, -5.6082e-02, -2.2566e-05,  2.6283e-02, -5.6457e-02,
         5.1750e-02, -4.1061e-02,  1.8677e-02,  8.8409e-02, -1.0113e-02,
        -5.1632e-02, -1.9620e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1786, -0.0275,  0.0954,  0.0784, -0.0469,  0.0430,  0.0096, -0.0418,
         0.0698, -0.0392,  0.0980,  0.0072,  0.0294,  0.1456, -0.0495,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1610e-01, -1.5680e-01,  8.6240e-03, -1.1415e-01, -2.4850e-02,
        -3.6664e-02, -1.4476e-02, -2.4450e-02,  6.2185e-02,  1.7844e-02,
         1.7025e-02,  9.0998e-03, -6.9966e-02, -1.8122e-02,  3.1606e-02,
        -2.3399e-02, -2.6956e-02,  3.1752e-02,  9.8044e-02,  4.5130e-02,
         7.6578e-02,  1.4296e-01,  9.0526e-02, -3.3291e-02, -2.0169e-02,
        -1.8757e-02, -2.2530e-02,  1.0827e-01, -8.8921e-03, -2.1793e-02,
        -9.0102e-02, -3.9032e-02, -1.7634e-01, -3.1045e-02,  7.1819e-02,
         8.5617e-04,  2.7960e-03,  2.9352e-02, -1.8627e-02,  4.7640e-02,
        -8.2174e-05,  2.7395e-02, -8.2137e-02, -1.9328e-02, -3.3524e-02,
        -9.2340e-02,  1.9132e-01, -3.2355e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-0.2667,  0.3171,  0.0747,  0.0613, -0.0271,  0.0635, -0.0405, -0.2316,
        -0.3319, -0.0478, -0.0180,  0.0163, -0.0253,  0.0052,  0.0146, -0.0070,
         0.0033,  0.1282,  0.0356,  0.1009,  0.0054, -0.0269,  0.0219,  0.0716,
        -0.0024,  0.1624,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2965, -0.5715, -0.0803, -0.1740, -0.1586,  0.0189,  0.2560,  0.1168,
         0.1505,  0.1043,  0.0055, -0.0850,  0.0420,  0.0339,  0.0291, -0.0848,
        -0.0443,  0.0478,  0.0801,  0.0481,  0.0537,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5201, -0.1766, -0.0393,  0.2097,  0.1160,  0.1402,  0.1115, -0.1405,
         0.0062, -0.0342, -0.0106,  0.0344,  0.0749,  0.0404,  0.0252, -0.0172,
        -0.1083,  0.0861, -0.0826, -0.1487, -0.0211, -0.0201,  0.0244,  0.0534,
        -0.0021,  0.0243, -0.0039, -0.0554, -0.0561, -0.0278,  0.0428,  0.0827,
         0.0220,  0.0218, -0.4827, -0.0190,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4691,  0.8273, -0.0523,  0.3697, -0.0125, -0.3438,  1.1352, -0.2729,
        -0.0869,  0.0892,  0.1457, -0.0049, -0.0252, -0.0096, -0.0124,  0.2564,
        -0.1079,  0.0508,  0.4482, -0.1718,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5935, -1.1134, -0.0521, -0.0355,  0.0357,  0.1285,  0.0427,  0.2001,
         0.1935,  0.0195, -0.1115, -0.0872,  0.0891,  0.0715,  0.0307,  0.1563,
        -0.1019,  0.0192,  0.0479,  0.1978, -0.2300,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7472, -1.7713, -0.8557,  0.6461,  0.1108, -0.0019, -0.4126,  0.2395,
        -0.1073, -0.3029,  0.1617,  0.3455, -0.0767,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6021e-01, -4.2465e-01, -1.9647e-01, -1.0522e-01,  1.6764e-01,
         7.3794e-02,  1.2642e-01,  2.2489e-02, -1.3278e-02, -2.1507e-02,
         2.2272e-02,  5.2613e-02,  1.0719e-01,  4.4173e-02,  5.7913e-02,
        -3.5436e-02,  4.8581e-03,  1.8459e-01,  6.7188e-02, -2.0523e-02,
         1.5359e-01, -9.5970e-02, -1.0753e-01, -2.8295e-01,  1.4176e-01,
        -1.5736e-05,  1.8229e-01, -8.8238e-02,  3.9007e-02, -3.4924e-02,
         7.5517e-02, -1.0320e-01, -6.6419e-02, -7.6302e-02,  9.9138e-02,
        -8.8201e-02, -2.0831e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7360, -0.5527, -0.3611, -0.2159,  0.1306, -0.0845, -0.1110, -0.0017,
        -0.1563,  0.0514,  0.0933, -0.1178, -0.0235, -0.0719, -0.0342, -0.0462,
        -0.0225, -0.0121, -0.0222,  0.0134,  0.0411, -0.0402,  0.1997, -0.3634,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5275e-01,  1.7048e-02, -1.0839e-02, -3.7098e-02, -1.8199e-02,
        -5.0457e-02, -7.2855e-02,  2.2174e-02,  1.2646e-02, -1.5131e-02,
         2.0547e-02,  5.9382e-02,  4.9688e-02, -1.0610e-02, -5.2084e-02,
        -3.2019e-02,  3.9913e-02,  2.0232e-02,  1.8280e-02,  9.8535e-02,
        -2.5186e-01, -4.5473e-01, -1.5785e-02, -7.3934e-02,  1.5724e-01,
         3.6221e-04, -8.1673e-02,  6.0419e-02,  9.8738e-03,  5.6669e-02,
        -1.2614e-01,  1.9131e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0040,  0.3138, -0.0038, -0.1721,  0.0500, -0.0493,  0.0148,  0.1713,
        -0.0981, -0.0245,  0.0732,  0.0241,  0.0259,  0.0328,  0.0043, -0.0311,
         0.0118,  0.0726,  0.0295,  0.0272,  0.0362, -0.0592, -0.2102,  0.0130,
         0.1169,  0.2981, -0.0662,  0.0724,  0.1814,  0.0985,  0.0543,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6532e-01, -1.7190e-01,  1.2445e-01,  1.8286e-01,  8.9336e-03,
         4.3558e-02, -1.9509e-02, -1.5260e-01, -2.1862e-01,  1.7750e-02,
         6.6067e-02, -9.2826e-02, -1.1846e-01, -1.6751e-01, -3.8720e-02,
        -9.6983e-02, -2.0581e-02, -2.0131e-04, -1.0413e-01,  1.1601e-01,
        -6.5119e-02,  1.4296e-01,  1.6242e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0794,  0.7238,  0.1567, -0.0085, -0.0258, -0.1234,  0.0565,  0.0379,
        -0.0147,  0.0155, -0.0875,  0.1692, -0.0025, -0.1834,  0.0509, -0.0074,
        -0.0583, -0.0101, -0.0382,  0.0166,  0.1224,  0.0013, -0.0100, -0.0264,
         0.0194, -0.0236,  0.0409, -0.0654,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 5.1988e-02,  7.9713e-01, -5.9841e-02,  4.3444e-02, -5.9244e-02,
        -1.5270e-02,  3.0940e-02, -5.7925e-02, -8.0881e-02, -2.7307e-02,
         2.8943e-02, -1.1835e-02, -3.2435e-04, -5.0325e-02, -1.3377e-02,
        -2.6574e-02,  4.2918e-02, -2.8960e-02,  8.6018e-02, -3.8954e-02,
         1.2590e-03, -1.4081e-02, -1.7379e-02,  3.2345e-02, -7.3370e-02,
         7.0084e-02,  1.5570e-01,  6.5061e-02,  9.9616e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5637,  1.8658,  0.3770,  0.0350, -0.2244, -0.3376, -0.2216,  0.2076,
        -0.2607, -0.4230, -0.0416, -0.1288,  0.0932,  0.0264,  0.0764, -0.0456,
        -0.0153,  0.1644,  0.1024,  0.0280, -0.1664, -0.2230, -0.2979, -0.1220,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3462,  0.5580,  0.1211, -0.1280,  0.0176, -0.0117, -0.0896, -0.0316,
        -0.0268,  0.0693,  0.0942,  0.0353,  0.1472,  0.0825,  0.0463,  0.0103,
         0.0390, -0.0006, -0.0996,  0.0311,  0.0239, -0.0688,  0.0150, -0.0438,
         0.0104,  0.0382,  0.0376,  0.0230,  0.0579, -0.0382,  0.2228,  0.1937,
         0.2929,  0.0135], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0991, -2.2010, -0.4539,  0.0527,  0.1361, -0.0745, -0.3279,  0.1158,
         0.0884,  0.2158,  0.0113, -0.0339,  0.0894,  0.0396, -0.0390,  0.0137,
        -0.0381, -0.0821,  0.0346, -0.0172, -0.0053, -0.0321,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2085e-01, -1.4957e+00,  2.4066e-01, -1.5922e-01,  1.6847e-01,
        -1.2524e-03, -1.0823e-01, -3.7049e-02,  1.6494e-02,  1.9023e-01,
        -1.7006e-02, -4.8734e-02,  4.7473e-02, -9.8399e-03,  1.9376e-02,
        -1.0465e-01,  4.8460e-01, -5.0637e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2879, -1.2832, -0.3822,  0.2375,  0.4587, -0.2639,  0.0534, -0.0585,
        -0.0868, -0.2403,  0.0200,  0.0389, -0.0530, -0.0028,  0.0223,  0.1934,
         0.0122,  0.0428, -0.1596, -0.0630, -0.0897,  0.2913,  0.2590,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3300, -1.2196, -0.0073,  0.0118,  0.2359,  0.0878,  0.1627, -0.0263,
         0.0655,  0.0859, -0.2188,  0.0021,  0.0105, -0.1755,  0.3511,  0.0510,
        -0.1909,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5491, -0.2124, -0.1535, -0.0642,  0.0482,  0.0078, -0.1872, -0.0246,
         0.0893, -0.2032, -0.0625,  0.0654,  0.0011,  0.0213, -0.0209, -0.0089,
        -0.1994, -0.1396,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8259e-01, -4.6635e-01, -5.1123e-01,  9.6015e-02,  3.2576e-02,
        -5.0362e-02,  5.3022e-02,  2.1278e-02,  3.2520e-05, -1.9010e-02,
         9.0925e-04,  4.4690e-03,  1.5098e-01,  9.3437e-02,  6.9845e-02,
        -5.7542e-02,  1.0924e-01, -3.2858e-02, -1.1101e-02,  1.1196e-01,
        -1.3414e-01, -1.1941e-03, -1.6437e-01, -1.8739e-01,  1.0801e-01,
        -4.6043e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5654, -0.2917,  0.6019, -0.0081, -0.0941, -0.1158,  0.0085,  0.0272,
        -0.0503, -0.0219,  0.0339,  0.0024,  0.0090,  0.0751,  0.0457, -0.0456,
         0.0727,  0.0029,  0.0336,  0.1324, -0.0026, -0.0650,  0.0055, -0.0247,
         0.0649, -0.0726, -0.1151, -0.0275, -0.2755,  0.0484, -0.0156,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0645, -0.6106, -0.1024,  0.0804,  0.0759, -0.1874,  0.1004,  0.0637,
         0.0396,  0.1381, -0.1011,  0.0237, -0.0842, -0.0401,  0.1093,  0.0093,
        -0.1335,  0.0106,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3628, -1.7670,  0.0020, -0.0918, -0.4254, -0.0343,  0.0779, -0.4070,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.2471e-01,  1.5469e+00,  2.8362e-01, -1.6158e-01,  3.2508e-02,
        -5.7914e-03, -5.1580e-02,  6.5548e-02,  4.7361e-02,  1.1127e-01,
         5.2054e-02,  1.4937e-01,  3.1139e-02,  6.3332e-03, -3.6161e-03,
         3.1389e-02, -5.2052e-02, -8.9328e-02, -1.3153e-01,  2.3162e-01,
        -3.1851e-01, -1.3604e-01, -5.9893e-02,  4.9320e-02,  4.3712e-02,
         5.2641e-02, -6.7034e-02,  3.2761e-02, -1.0593e-01,  4.3008e-02,
         1.5663e-01,  2.6182e-02,  5.3661e-02,  4.5052e-02, -8.8136e-04,
         1.6797e-01, -3.9735e-02, -2.1073e-01, -2.1762e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3564,  2.3205, -0.6783,  0.1253,  0.2135, -0.0968,  0.1410, -0.1666,
        -0.2239,  0.4779,  0.5697, -0.0576,  0.3304,  0.0957, -0.0521, -0.0069,
        -0.1926, -0.1253, -0.1249, -0.2823,  0.1046,  0.1013,  0.1615, -0.0634,
         0.0762, -0.0593, -0.5304,  0.0671,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0749,  0.7997,  0.4084,  0.0162, -0.1171,  0.0765,  0.1179, -0.1192,
        -0.0190, -0.0262, -0.1757,  0.0096, -0.1124,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9905, -1.9298, -0.0746, -0.2241,  0.0783, -0.0863, -0.2420,  0.1275,
        -0.0210, -0.1412, -0.0026, -0.2931, -0.1809,  0.5978, -0.1574,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6310e-01, -8.6306e-01, -2.5270e-01, -3.1896e-01, -2.7332e-01,
        -9.6518e-02,  2.4012e-02, -9.3706e-02, -1.4495e-01, -1.4610e-01,
         5.3387e-02,  7.1300e-02, -4.7164e-02, -2.3279e-02, -2.7423e-02,
         7.6504e-03,  2.5222e-03, -3.9464e-02,  3.2047e-02,  1.5256e-02,
        -2.8483e-02, -5.1200e-02, -6.0262e-02,  5.2006e-03, -3.3003e-01,
        -1.5023e-01, -2.1782e-02, -1.4968e-02, -2.5080e-02,  1.4755e-03,
         7.2505e-02, -7.3053e-02, -4.4396e-05, -3.7049e-02, -2.4490e-01,
        -9.9786e-02,  8.6547e-02,  6.4670e-02, -6.8058e-02, -1.1852e-04,
         1.0746e-02, -1.9501e-02, -5.0453e-02,  3.4172e-02, -5.7562e-02,
         5.5057e-02, -1.0059e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.9637e-01, -2.4843e+00,  4.0288e-02,  3.0209e-01, -1.3650e-02,
         1.0461e-01,  1.5890e-01,  3.9732e-03,  2.7239e-01, -4.2636e-02,
         4.9069e-02, -1.8321e-01,  1.3041e-01, -3.2094e-02,  4.0480e-02,
         3.7186e-02,  8.2350e-02,  1.8559e-01,  4.8626e-02, -1.0747e-03,
        -3.0664e-03, -2.7525e-03, -5.6239e-02, -8.5445e-02,  7.5059e-03,
         4.2767e-02, -2.9182e-02, -1.7788e-02,  6.7116e-02,  4.0721e-02,
         5.0449e-02,  5.4881e-02, -3.2987e-02,  1.0181e-02,  3.5956e-02,
         3.6509e-02,  7.5443e-02,  5.3310e-02,  1.9616e-02,  2.2097e-02,
         1.2368e-01, -2.1508e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4121, -4.3586, -1.1650, -0.0792, -0.1267, -0.1592,  0.1815, -0.0866,
         0.2176, -0.2670, -0.2554, -0.1979,  0.0483, -0.0835,  0.1495, -0.1213,
         0.0140, -0.1911, -0.1452, -0.0408, -0.0309,  0.0606, -0.0629, -0.1781,
        -0.5759,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4260, -2.0255, -1.0801,  1.4596,  0.3832,  0.1556, -0.7673,  0.2143,
        -0.7055,  0.7673,  0.1023, -0.3627, -0.3951,  0.0865, -0.5847,  1.2497,
         0.0438, -0.4127, -0.3284,  3.4285,  0.2803, -0.3818,  1.2109,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4276,  2.4584,  0.2134,  0.0970,  0.1547,  0.4049,  0.2702, -0.1780,
        -0.0643,  0.4696,  0.1104, -0.0973,  0.4351,  0.1055, -0.0597,  0.0058,
        -0.0581,  0.3333, -0.1782, -0.1294, -0.0056, -0.4367, -0.2782,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3456, -2.3307, -0.3117,  0.0063, -0.0319, -0.0653, -0.2349, -0.1643,
         0.1160, -0.2308,  0.3485, -0.1144, -0.2696,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8742,  2.3591, -0.3193, -0.3992,  0.0474, -0.1736, -0.1032, -0.1944,
        -0.2036, -0.2615, -0.3409, -0.0826, -0.1144, -0.1567,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6009,  5.4694,  0.9426, -0.0596,  0.7369,  0.1154, -0.0060,  0.1150,
        -0.0396, -0.3589,  0.0682,  0.1196, -0.0477, -0.7818,  0.5001, -0.4594,
         0.2020, -1.1284,  0.8902,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.0522,  0.7581, -0.0915, -0.0083, -0.1413, -0.1003, -0.0418,  0.0626,
         0.1311,  0.0220,  0.0374,  0.1246, -0.0409, -0.0683,  0.1003, -0.1214,
         0.0304, -0.0361, -0.1126, -0.0164,  0.0511,  0.0406, -0.1211, -0.1015,
         0.4023,  0.5677,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5719e-01, -3.3859e+00, -3.4681e-02,  4.8146e-01,  2.1093e-01,
        -1.7183e-01,  3.0445e-01,  2.9951e-02,  1.0646e-01,  3.6819e-02,
         4.6824e-04, -1.1488e-03,  3.8542e-01,  2.0203e-01, -5.2547e-02,
         5.5322e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9623, -3.2704,  0.2637, -0.7629,  0.6062, -0.4124, -0.3374, -0.2280,
         0.1065,  0.0275, -0.1592,  0.3625, -0.1720,  0.0643, -0.1860, -0.1856,
        -0.4574,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2931, -5.1187, -0.1006,  0.7430, -0.1849,  0.0568,  0.1405, -0.6446,
         0.4318,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3477, -0.5050, -0.3215,  0.0058, -0.0032,  0.0527,  0.1517,  0.0092,
         0.0809, -0.0129,  0.1582,  0.0119,  0.0010, -0.0301, -0.0304,  0.0689,
         0.0230, -0.0934, -0.0178, -0.0166,  0.0378,  0.0108,  0.0534, -0.0769,
        -0.0334, -0.0058,  0.0568, -0.0451, -0.0177,  0.0674,  0.0079,  0.0215,
        -0.0167, -0.0283,  0.0255,  0.0533, -0.0132, -0.0604, -0.0636,  0.0018,
        -0.0067,  0.0706,  0.0688, -0.1442,  0.2233, -0.1416], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2971,  0.6634, -0.6869, -0.0072, -0.1139,  0.0576, -0.2711, -0.0415,
        -0.0490, -0.2208, -0.0366, -0.0161, -0.0907, -0.0073,  0.0052, -0.0030,
        -0.0530,  0.0186, -0.0162, -0.0251, -0.0049,  0.0188,  0.0321, -0.0619,
        -0.0442,  0.0126, -0.0041, -0.0692, -0.0386, -0.3484,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.1998, -11.7377,  -0.2683,   1.3998,  -1.3385,  -0.6180,  -0.6975,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5570, -3.7847,  0.2790, -0.3396,  0.1135,  0.1902, -0.4516,  0.0796,
        -0.4093,  0.5565, -0.4307,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0657, -1.2386,  0.2160,  0.2505,  0.1071,  0.1500, -0.0233, -0.0288,
        -0.2575,  0.0698,  0.0033, -0.0420, -0.3915,  0.0381,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2040,  1.8999,  0.0785,  0.0781,  0.0095, -0.0562,  0.0659,  0.1589,
        -0.0735, -0.0616, -0.0638, -0.0062, -0.0592, -0.0261,  0.0653, -0.2705,
        -0.0417, -0.0801,  0.0302,  0.0321,  0.1112, -0.0805,  0.0043, -0.0178,
        -0.1691,  0.0829, -0.0426,  0.1564, -0.5245, -0.0178,  0.2311, -0.0402,
        -0.3516,  0.3309,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0174,  1.9153,  0.3873,  0.2959, -0.2863,  0.6667, -0.4300, -0.1746,
         0.1776,  0.6482, -0.0376,  0.0310,  0.2827, -0.0726,  0.1183, -0.0309,
         0.2990,  0.0133,  0.0452, -0.1093, -0.5212, -0.1401, -0.0627, -0.0968,
        -0.1111, -0.0734, -1.2625, -0.1015,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8175,  1.1011, -0.0727, -0.0542, -0.1801,  0.0580,  0.0994, -0.0540,
         0.1055,  0.0149,  0.0558, -0.0358,  0.1633, -0.2539,  0.1533,  0.2116,
         0.1096, -0.0076, -0.1587,  0.0715, -0.0554, -0.0847,  0.2581, -0.0377,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.6894, -4.2276,  0.7417, -0.9615, -0.2951, -0.2352, -0.3272,  0.0611,
        -0.0691,  0.0366, -0.0688, -0.1404,  0.1302,  0.2674,  0.2682,  0.1996,
         0.1738,  0.5230,  0.1971, -0.2758, -0.2670,  0.0951,  0.3715, -0.0060,
         0.0798, -0.1826,  0.0946,  0.1072, -0.1008, -0.0558, -0.6376,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3341, -1.9860,  0.7792,  0.1518,  0.4055, -0.1967, -0.2176,  0.2410,
        -0.0602,  0.3517,  0.1803,  0.4473,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1283, -2.4412, -0.3046,  0.3313,  0.3037,  0.2047, -0.6979, -0.0262,
        -0.0559,  0.0314, -0.0766, -0.0398,  0.2664, -0.3071,  0.3849,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6756e+00, -5.1503e+00, -1.4433e+00, -2.7383e-03, -1.4380e+00,
        -9.3528e-02, -3.4958e-01,  8.2690e-03,  7.5975e-03, -1.1850e-01,
         7.6469e-02,  1.2092e-01,  6.1780e-02,  1.3792e-01,  1.1920e-01,
         2.0414e-01, -1.3450e-01,  1.7831e-01,  6.2829e-01,  1.8377e-01,
        -4.5692e-02,  9.7321e-02, -3.7625e-02,  7.4429e-02,  8.9672e-02,
        -2.6315e-02,  5.1017e-02,  5.6709e-02,  1.2709e-02,  6.0363e-02,
        -2.4231e-01,  1.3376e-01, -1.5852e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4932, -1.4813,  0.0867,  0.4905, -0.0021,  0.4004,  0.1616, -0.0606,
         0.2488,  0.1613,  0.3652,  0.0081, -0.0453,  0.5347,  0.4700,  0.2373,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4037,  1.2603,  1.1182, -0.3031, -0.4559,  0.7774, -0.9106, -0.6273,
         1.1651,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9960, -1.1713,  1.8128, -0.1086,  0.0191,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2169, -2.3035,  0.0321, -0.1019,  0.0972, -0.0861,  0.0113, -0.0864,
        -0.1997,  0.1307, -0.0823,  0.0664,  0.2154,  0.3511, -0.0737,  0.0077,
         0.0208, -0.0700, -0.1053, -0.0785,  0.1098, -0.0749, -0.1432,  0.1309,
        -0.0297, -0.1581, -0.1101,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.0379, -10.0403,  -1.1014,  -2.6813,   1.7530,  -0.4308,   0.9245,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1491, -1.7712,  0.0809,  0.1001,  0.0650,  0.0428,  0.1742, -0.0285,
         0.1179, -0.0125,  0.0383, -0.0108,  0.0068,  0.0287,  0.0637,  0.0805,
        -0.0279,  0.0278,  0.0128, -0.1091,  0.1070,  0.1047,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1751e+00,  1.5034e+00,  1.1689e-03, -4.2536e-02,  1.6034e-01,
        -1.6934e-01, -1.9132e-01,  3.0006e-01, -1.8477e-01, -1.9481e-02,
        -2.3035e-03, -1.1581e-01,  1.3471e-01,  8.3448e-02, -1.6698e-01,
        -4.3116e-02,  9.0527e-02, -3.4768e-01,  1.6436e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3272e-01,  5.2309e+00, -1.2750e-01, -3.4453e-01, -3.2874e-01,
        -9.8982e-02,  1.8318e-01, -2.4970e-02,  1.1443e-01,  9.0518e-02,
        -2.2609e-01,  3.2610e-02,  2.4193e-02,  2.1491e-01, -5.1835e-02,
         1.2607e-01, -2.6108e-02, -5.7937e-02,  3.2293e-02, -2.2989e-01,
         9.0432e-02,  2.7136e-01,  1.3139e-02, -3.4204e-02,  2.6995e-02,
         1.5390e-01,  3.9054e-04,  1.7048e-01,  2.7055e-01, -4.9968e-02,
        -5.4346e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.7062, -2.9218,  0.5548,  0.0085,  0.2059, -0.0710, -0.0650,  0.0951,
        -0.2058,  0.0791,  0.0241, -0.0368,  0.0406,  0.0165, -0.0584,  0.0437,
         0.1536,  0.0607,  0.2748,  0.0233,  0.0299,  0.0968,  0.1369,  0.1272,
        -0.1323, -0.0436, -0.0180, -0.0158,  0.0871,  0.2555,  0.0278,  0.3110,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7664,  2.2721,  0.6427, -0.2043,  0.1061, -0.1002, -0.1887, -0.0454,
        -0.1580,  0.0539,  0.0597, -0.0827, -0.0072,  0.2272, -0.4187,  0.2001,
         0.0417,  0.1870, -0.0708,  0.0777, -0.2959, -0.0082,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1063, -2.8586,  0.3231, -0.0713,  0.1274,  0.0073,  0.4333, -0.3604,
        -0.4255,  0.0543, -0.0549, -0.0790,  0.0227,  0.0530,  0.0638, -0.0484,
         0.0324, -0.0054, -0.2250,  0.2756, -0.1083,  0.1152,  0.0438,  0.0430,
        -0.1709, -0.0939, -0.0183,  0.0282, -0.0811,  0.1765,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3550,  2.3347, -0.7677,  0.0349,  0.0236,  0.1483, -0.1431, -0.0600,
        -0.0933, -0.0380,  0.2676, -0.2482, -0.0891, -0.0311, -0.1021,  0.0462,
         0.5267, -0.2269,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2608, -4.4800,  0.1584, -0.7601, -0.6688, -0.5807, -0.1821,  0.1346,
        -0.0472,  0.2365,  0.0599, -0.0510,  0.0395, -0.0690, -0.1768,  0.4879,
         0.2760,  0.0826, -0.1215,  0.1454,  0.1021,  0.0258,  0.4988,  0.0190,
         0.0410, -0.0184, -0.2621,  0.1187, -0.0822,  0.1478, -0.0218, -0.0501,
        -0.0328, -0.2267, -0.0491,  1.8700, -0.4850,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7157e-02, -3.8128e+00, -3.4404e-01, -8.3995e-02,  1.5585e-01,
        -1.0313e-02, -4.0690e-02, -1.2630e-01,  1.1299e-01, -3.1108e-02,
         2.2737e-02, -1.3447e-01, -1.4517e-01, -1.3622e-01,  1.4292e-01,
         8.8994e-02,  1.1628e-01, -8.6593e-02, -8.9645e-03, -2.8828e-03,
        -2.1517e-01,  3.6563e-02,  5.5318e-01, -1.0293e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0308e-01, -4.5787e+00, -2.8324e-01,  1.8925e-01, -8.3077e-01,
         9.5346e-01, -1.9271e-01, -7.4152e-02, -7.5236e-01,  3.0234e-01,
         9.2356e-02,  1.5922e-01, -4.6140e-02,  4.4038e-02, -1.1242e-04,
         1.1467e-01,  6.8996e-02,  1.5122e-01, -6.6899e-02,  4.0240e-02,
         3.0762e-02,  1.3370e-01,  9.1517e-02,  2.0612e-01, -3.2809e-01,
        -9.1560e-02,  8.7101e-02,  8.5606e-03,  7.0861e-02, -2.0029e-01,
        -3.9852e-01,  1.8662e-01, -4.0931e-02,  8.3302e-02, -2.6912e-02,
         3.5626e-01,  8.1111e-02,  7.2880e-02,  4.6277e-02,  3.2381e-02,
         1.1285e-01,  9.1733e-02,  1.3099e-01, -6.6996e-04,  1.4600e-01,
        -6.2948e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5096, -1.1727,  0.0160, -0.2248,  0.1060, -0.0792,  0.1223,  0.0515,
         0.0992,  0.0640, -0.0850, -0.0070, -0.1373,  0.0743, -0.1292, -0.0356,
         0.2726, -0.0447, -0.0732, -0.0248, -0.1084, -0.4373, -0.1655,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1742, -5.0098, -0.5696,  0.2432, -0.0499,  0.5941, -0.1539, -0.0623,
         0.2793,  0.3676, -0.0928,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6445,  1.2511, -0.3635,  0.3557, -0.2254,  0.0203, -0.4179, -0.0543,
         0.0095, -0.0547, -0.3563, -0.0369,  0.4944,  0.1287,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3956,  2.3705, -0.3048, -0.4739,  0.1426, -0.0813, -0.1909,  0.0351,
        -0.1462, -1.3480, -0.0102, -0.1440,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6336e-01, -2.0868e+00,  3.4606e-01,  7.2527e-01, -2.7068e-02,
         1.7151e-01,  7.9885e-02,  3.7734e-02,  1.2408e-01,  4.1830e-02,
        -2.0220e-02,  8.5506e-02,  4.7271e-02,  4.7582e-02,  1.4489e-01,
         4.1329e-02,  1.2952e-01,  1.1707e-02,  1.0811e-01, -9.5107e-03,
         2.5175e-02,  1.1308e-01,  2.7774e-02, -1.4510e-01,  2.3167e-03,
        -5.2685e-04, -4.5488e-02,  2.0951e-01,  2.7329e-02,  1.1779e-01,
         8.1452e-02,  3.1930e-02, -1.1473e-01, -1.0335e-01,  6.1784e-02,
         4.1600e-03,  9.6525e-03,  3.0654e-02, -1.2308e-02, -1.2579e-02,
        -6.2231e-03, -3.9017e-03, -8.8245e-02,  2.9550e-02, -5.5823e-02,
        -1.4967e-01,  8.0341e-03, -6.7813e-03,  2.9856e-02,  4.0994e-02,
        -6.0903e-02,  4.7744e-02,  3.7533e-02,  1.8403e-02,  1.1583e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-4.5572, -5.8433, -1.0838, -0.2160,  0.9248,  0.4868, -0.0751, -0.0369,
        -0.2105,  0.2575,  0.2522,  0.2778,  0.2150, -0.1700,  0.5774,  0.1185,
         0.5844, -0.1012,  0.0485,  0.1579, -0.2284,  1.2361, -0.9413,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0646, -3.5644, -0.8918,  0.4142,  0.0472, -0.0584, -0.1176,  0.6751,
         0.6817, -0.0706,  0.3701,  0.2635,  0.1779,  0.0794,  0.0318,  0.0405,
         0.3313, -0.3143,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6905, -1.9998,  0.5754,  0.3356,  0.3197,  0.2390, -0.4020,  0.0348,
         0.1058,  0.1541,  0.0328,  0.2005,  0.1191,  0.0547, -0.0123, -0.0400,
         0.1187, -0.1473,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5000,  1.8492,  0.0454, -0.1545, -0.0084,  0.0806,  0.0913, -0.0995,
        -0.0452, -0.0248,  0.0916, -0.1062, -0.0049,  0.0369, -0.1050,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0252,  3.0799,  0.4119,  0.6908,  0.3491,  0.0893,  0.0335,  0.1081,
         0.0354,  0.0212, -0.3498, -0.3332,  0.2125, -0.2802, -0.5889, -0.9154,
         0.1753,  0.3740,  0.2329,  0.1188, -0.0095,  0.0110, -0.0065,  0.0666,
        -0.1116, -0.1530, -0.0664, -0.0383, -0.3847,  0.2582,  0.4796, -1.0774,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6785, -2.8901, -0.0583,  0.2750,  0.0849, -0.0191, -0.0791,  0.0658,
         0.2009,  0.0759, -0.1654,  0.0765,  0.1797,  0.0738, -0.0150,  0.1661,
         0.0043, -0.1317,  0.2061,  0.2034, -0.4444,  0.0817,  0.0916,  0.2681,
         0.1717,  0.2855,  0.0665, -0.0714,  0.1943,  0.2111,  0.1724,  0.4942,
        -0.4654,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9863e-01,  2.4483e+00, -2.8915e-02,  2.3716e-01, -1.2663e-01,
        -5.3005e-01,  4.0035e-03, -3.0135e-02,  3.5864e-02,  1.5352e-01,
        -6.2799e-02, -2.2680e-01,  8.9107e-02, -3.9959e-01, -3.6024e-01,
         4.6343e-01,  6.7504e-02,  1.8919e-02, -1.1421e-02,  5.4277e-02,
        -3.5424e-02,  3.2207e-02, -1.6346e-02,  4.1328e-02, -9.7364e-03,
        -5.7954e-02,  1.5720e-01,  2.3485e-01, -1.8537e-02, -1.7178e-01,
         1.2657e-01,  7.8587e-02, -2.3374e-02,  1.2125e-01,  1.5379e-02,
        -1.8281e-01, -3.7767e-02,  7.9164e-02, -2.8881e-04,  6.4681e-02,
         4.5471e-02, -1.3519e-02,  2.5435e-02, -1.6230e-01, -4.6112e-02,
         1.2605e-01, -1.3091e-01, -6.2046e-02, -1.4116e-01, -2.8835e-02,
        -2.5719e-01, -9.1212e-02, -5.6044e-02, -2.3296e-02, -1.7422e-01,
         1.3842e-02, -2.0424e-01, -1.0053e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2931, -2.5263, -0.9223, -0.1146, -0.1332,  0.0383,  0.1264, -0.0350,
        -0.1329,  0.0597, -0.0776, -0.0262, -0.1652, -0.0782, -0.0134, -0.0310,
         0.1186,  0.0515,  0.0602,  0.0316,  0.0917, -0.0117,  0.0738,  0.2236,
         0.1637,  0.2879,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9033e-01,  1.3455e+00, -8.4484e-02, -2.8791e-03,  7.5816e-02,
        -3.9439e-02, -2.7482e-02,  4.7120e-02, -1.7482e-02, -6.5689e-03,
         9.0515e-04, -3.3768e-02,  5.6593e-02,  4.1487e-03, -1.7602e-02,
         1.6862e-02, -5.0299e-02,  1.3258e-02,  2.8139e-02,  1.4988e-01,
         4.1223e-02,  9.8293e-02, -7.6699e-02,  2.6073e-01, -1.6607e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1210e-01, -3.3549e+00,  4.2548e-01,  1.6186e+00, -2.7886e-01,
        -3.0137e-01, -3.9346e-02,  2.2142e-01,  2.7137e-01, -1.8566e-01,
         3.9042e-01, -2.7861e-01, -1.1076e-02,  1.4673e-01, -9.1555e-02,
        -1.1988e-01, -3.5253e-01, -7.6161e-02,  7.0667e-02, -3.5094e-02,
        -1.4477e-01, -1.8407e-01, -1.1722e-02, -3.0731e-01, -8.0215e-02,
         2.3214e-01,  1.4396e-01,  1.3266e-02, -1.3496e-01,  4.0933e-02,
        -6.4779e-02, -5.2006e-02, -1.6851e-01,  1.0366e-01, -2.5052e-01,
        -3.3197e-02, -1.1619e-01,  1.0406e-02, -2.5819e-02, -4.6884e-04,
        -2.9085e-02,  1.5711e-01, -7.1704e-02,  1.2646e-01,  6.5822e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3882,  3.2051, -0.2910,  0.4354,  0.0289,  0.2912,  0.2986, -0.0049,
        -0.2014,  0.0365, -0.1420,  0.1630,  0.0659,  0.0621,  0.1585, -0.4509,
         0.2426, -0.0544,  0.0170,  0.0300,  0.1868,  0.1622, -0.0619,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1768, -2.4427,  0.6138, -0.8701, -0.2289, -0.2316, -0.1846, -0.1194,
         0.1878,  0.2126,  0.1190,  1.0261,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.0210,  2.6796,  0.2898,  0.1066, -0.2390, -0.0106, -0.1438,  0.0532,
         0.0608, -0.0605,  0.0517,  0.0535, -0.0215,  0.1993, -0.1596, -0.0396,
        -0.0300,  0.3148,  0.0134, -0.0436,  0.1983, -0.0942, -0.6509,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1987e-01,  5.3262e+00,  3.3511e+00,  6.7279e-01, -1.2266e-01,
        -1.1750e-01,  6.8422e-02, -4.9793e-02,  2.3895e-01,  2.4483e-01,
         2.1665e-01,  1.2248e-01,  2.0494e-01, -1.3663e-01, -6.6720e-01,
         2.7168e-02, -1.2481e-01,  4.0500e-01,  2.0712e-02,  6.0432e-02,
        -2.8533e-01, -1.4958e-01, -1.3075e-01, -1.0669e-01,  4.7053e-03,
        -3.5853e-01,  2.4197e-01, -2.6604e-02,  2.6765e-01,  9.3247e-02,
         3.1046e-01, -4.5348e-01,  1.5773e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0211, -0.2234, -0.0516, -0.2076, -0.1486,  0.0135, -0.0613,  0.0395,
         0.1232,  0.0107,  0.0941,  0.0642, -0.0951, -0.0900,  0.0249, -0.0241,
         0.0396, -0.0666,  0.2228,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7409, -6.5152, -0.6301, -0.9280, -1.0105, -0.0094,  0.1956, -0.0091,
         0.2724,  0.2434, -0.0515, -0.5260,  0.4654, -0.9646,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2892, -2.9595, -0.3238,  0.0076, -0.0362,  0.0982,  0.1435,  0.0225,
        -0.0042,  0.0390, -0.1514,  0.0485,  0.0353,  0.6038, -0.1358,  0.0491,
         0.2345, -0.0213,  0.0328,  0.0293, -0.2666, -0.6840,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3669, -2.1289,  0.0864, -0.2610, -0.0395, -0.1316, -0.0880,  0.3049,
         0.0033,  0.2468, -0.1248, -0.0802,  0.1980,  0.0363,  0.0601,  0.1532,
        -0.0687, -0.1782, -0.0649,  0.1714, -0.1247, -0.0537,  0.1189,  0.1069,
        -0.0853,  0.0868, -0.0364, -0.0071, -0.0591, -0.3022,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4466,  2.7412, -0.5284, -0.6210,  0.5557,  1.1306, -0.7774, -0.5624,
        -0.1354,  0.0760, -0.2593, -0.2275, -0.6660,  0.3187,  0.4374, -1.0144,
        -2.3254,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2797, -3.9370, -0.3957, -0.5167,  0.3619,  0.3432,  0.1406,  0.4516,
         0.0164,  0.1556,  0.3435,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2726, -4.7622, -0.5418,  0.1657, -0.3041,  1.0677, -0.2526,  0.0649,
         0.1868,  0.1219, -0.4482,  0.0209, -0.2721, -0.1305,  0.7461, -0.0306,
        -1.4758,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6315,  3.7965,  0.5633,  0.1979,  0.0178, -0.0688,  0.1223, -0.0193,
         0.0778, -0.1105,  0.2486, -0.1562,  0.1021,  0.2883, -0.4615,  0.0374,
        -0.0445, -0.0372, -0.4875,  0.1597, -0.1547,  0.0139, -0.0827, -0.0497,
        -0.4509,  0.1014,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4746e-01,  2.8297e+00,  3.3548e-01, -9.1026e-02, -1.4333e-01,
        -1.1560e-01, -1.7832e-01,  2.0775e-01, -2.3629e-01,  2.2512e-02,
         1.2774e-03,  1.1632e-01,  2.5828e-03, -3.3535e-03, -2.8717e-02,
        -2.3587e-01, -1.0698e-01,  2.1779e-02, -6.1410e-02, -3.2204e-02,
        -2.1375e-01, -7.1277e-02, -6.4132e-02, -2.1249e-02,  4.8429e-02,
        -6.0023e-04, -3.3091e-02, -5.0042e-02, -2.5895e-01,  9.9044e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5151, -3.2305,  0.1394,  0.2787, -0.9454, -0.2212, -0.0742, -0.5011,
        -0.0845,  0.8260,  0.0730, -0.3004, -0.2878,  0.0186, -0.3088, -0.0518,
        -0.0045,  0.6114,  0.2658,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.0508,  2.4690,  0.2872,  0.0092, -0.0744, -0.1932,  0.0352, -0.0293,
        -0.0649, -0.0305, -0.0251, -0.0751, -0.0575, -0.0189, -0.1442, -0.0877,
         0.0547, -0.1602,  0.0139,  0.0107,  0.0290, -0.0237,  0.0668, -0.0271,
         0.0572, -0.6920,  0.0874,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9215,  2.5404,  0.0950, -1.1563,  0.4103, -0.0460, -0.6238, -0.1975,
        -0.0387,  0.2636, -0.5259, -0.1381,  0.7912,  0.5368,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2010,  3.7038,  0.3270,  0.2353, -0.1201,  0.4530,  0.0987, -0.3442,
        -0.4133,  0.1684,  0.0604,  0.0395,  0.0961,  0.2135,  0.4446,  0.0949,
        -0.1375, -0.0297,  0.3451,  0.1477,  0.0266,  0.1024, -0.1710,  0.2450,
         0.4017,  0.0337,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5108,  2.1625,  0.3203, -0.1682, -0.1998, -0.2247, -0.0932,  0.0471,
        -0.5175,  0.3411,  0.0361, -1.2155,  0.7176,  0.1352,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2257, -3.7072,  1.3423,  0.1297,  0.2221,  0.4515,  0.1740,  0.2819,
         0.1528, -0.0968, -0.1588, -0.0839,  0.1040,  1.7425,  0.3572,  0.2113,
         0.6350,  0.0786, -0.0556,  0.0226,  0.1870,  0.1780,  0.2226,  0.0666,
        -0.1384,  0.1743, -0.0065, -0.0585,  0.6660, -0.3230,  0.2084, -0.1952,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8354e-01,  3.1757e+00, -2.2085e-01,  8.4670e-02,  1.9181e-01,
        -1.4550e-01, -5.2797e-02,  3.1389e-01,  1.4518e-01, -6.0906e-04,
        -1.5372e-01,  8.5677e-02,  5.0161e-02, -4.4918e-02,  5.9375e-02,
         6.0121e-02, -2.4808e-01,  9.5271e-02,  9.6433e-02,  4.5604e-02,
        -6.6894e-02,  4.1607e-02,  3.8662e-02,  4.4335e-01, -1.5086e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2028,  3.0441, -0.7601, -0.0491, -1.0806, -0.5678,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5965,  2.4167,  0.3007, -0.0178, -0.3051,  0.0352, -0.0908,  0.0323,
        -0.0509, -0.0755, -0.0960,  0.0518,  0.0725,  0.0733, -0.0583, -0.1081,
        -0.0300,  0.0385, -0.0694, -0.1066, -0.1068, -0.0456,  0.1031, -0.0504,
         0.0038,  0.0927, -0.0027, -0.1510, -0.0032,  0.0088,  0.0082,  0.1608,
         0.0315,  0.0340, -0.0849,  0.2648,  0.0421], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8842, -8.6317, -0.2947, -0.5661,  0.1848, -0.3747, -0.1041, -0.2150,
        -0.0371,  0.0905,  0.0540,  0.0965,  0.0955,  0.0662,  0.0600, -0.0341,
        -0.2494,  0.0853,  0.1089,  0.1456,  0.6053,  0.0539,  0.0775, -0.2058,
         0.2463,  0.2320, -0.1054, -0.1546,  0.1830,  0.1153,  0.0407,  0.0535,
         0.2149,  0.1542,  0.0302,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.3249, -11.3714,  -2.2204,   2.7723,  -0.6521,   0.8589,  -0.4289,
          0.1744,   0.1151,   0.4289,   0.1487,   0.8478,   0.8644,  -3.3413,
          0.8363,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9609e-01, -5.1205e+00, -7.2016e-01,  8.6980e-01,  1.5305e-01,
         1.5632e-01, -3.1851e-01, -1.1335e-02, -3.4564e-01,  4.0222e-03,
         1.2654e-01,  2.0231e-01,  1.5113e-01,  1.0032e-01, -7.6149e-02,
         3.6956e-01,  7.8965e-02, -1.0523e-01,  2.3768e-01, -5.9388e-02,
        -9.3343e-02, -2.2503e-02,  8.6318e-02,  4.7443e-02,  6.6025e-02,
        -8.4759e-02,  4.3552e-02, -4.5861e-02, -4.5286e-02,  1.3606e-01,
        -2.5709e-02,  1.1324e-02,  1.3424e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4378, -4.6214,  0.0788,  0.0969,  0.1901, -0.1663,  0.1093,  0.0327,
         0.0536,  0.0320, -0.0613,  0.2616,  0.2405,  0.4944,  0.3326, -0.0240,
        -0.0231, -0.0946, -0.0488, -0.0587,  0.1565, -0.0058,  0.1077, -0.0204,
         0.0516, -0.0537,  0.4703,  0.3026,  0.0551,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.2491,  5.7205, -1.1679, -0.7621, -0.8269, -0.2671,  0.2204,  0.0463,
        -0.2575, -0.3543,  0.3234, -0.0544, -0.2225, -0.4262, -0.1457,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3385e-01, -2.2913e+00, -2.0586e-01,  6.3678e-01,  1.9949e-01,
         5.0077e-02,  6.5755e-02, -8.4495e-03, -3.8240e-01,  3.3699e-01,
         8.0100e-02,  4.3065e-02,  8.0021e-02,  2.4772e-01, -5.6403e-02,
        -2.3297e-01,  7.0706e-02,  5.0945e-03,  2.0368e-02, -5.8465e-03,
        -1.2562e-02, -4.7626e-02,  2.0116e-02,  2.3968e-02,  2.9657e-02,
        -9.2354e-03,  3.5476e-04,  4.0494e-02,  3.6787e-01, -4.0298e-02,
         2.7944e-02, -1.8237e-02,  3.6852e-02,  2.6757e-02, -6.9681e-02,
         4.7979e-03,  2.5109e-02, -3.2111e-03, -4.4084e-02, -5.2048e-02,
        -2.5184e-02,  4.7668e-02,  1.1744e-02,  7.8465e-02, -9.2786e-02,
         1.2921e-01, -6.3764e-03,  4.0267e-01, -2.7071e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5541,  2.6871,  1.7314,  0.2228, -0.0035,  0.1438,  0.1683,  0.1500,
         0.2073,  0.4196,  0.0331, -0.0303, -0.0113, -0.0468,  0.0404, -0.0499,
         0.2005,  0.0293,  0.1108,  0.0039,  0.1108,  0.1291, -0.0044,  0.0106,
        -0.0814, -0.1522, -0.0513,  0.0878,  0.1140, -0.0980, -0.0251,  0.0273,
        -0.0669,  1.0231,  0.3362,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7742e+00,  5.2828e+00,  1.7519e-01,  1.2690e-01, -2.4406e-01,
        -2.5783e-02,  1.8748e-01, -9.9400e-02, -9.1230e-02, -2.3866e-01,
        -1.5251e-01, -6.6333e-02, -1.9444e-03, -1.0582e-01,  5.4989e-02,
        -1.0847e-01, -5.9270e-03, -1.1434e+00, -3.1996e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5224,  6.2065,  0.0546, -0.1865,  0.1295,  0.0126,  0.3187,  0.0884,
         0.7931,  0.0614, -0.5631,  0.2991,  0.2932, -1.3221, -1.7441,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4236, -0.9057, -0.4291, -0.0507, -0.0655, -0.4144, -0.0479,  0.0238,
        -0.0339, -0.1513,  0.0822, -0.1227, -0.2461, -0.0072, -0.1711, -0.0847,
        -0.0323,  0.1078, -0.2369, -0.1885,  0.0817, -0.0100,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1922e-01, -3.9982e+00, -9.0348e-02,  2.6087e-01, -9.1026e-02,
         2.4280e-03,  2.4762e-01, -1.2332e-01, -3.0978e-01, -1.3388e-01,
        -3.6062e-02,  1.6312e-01,  9.0682e-02,  2.6234e-01, -3.0612e-02,
        -4.6657e-02, -1.2296e-01, -1.7860e-01, -1.9195e-01, -5.1443e-02,
        -4.4312e-03, -2.6494e-02,  1.5051e-01, -1.6522e-01, -4.1134e-02,
         1.2287e-01,  1.9728e-01, -9.6333e-02,  3.4201e-01,  1.9808e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5379, -3.3230,  0.7748,  0.2599,  0.1013,  0.4508,  0.2257, -0.4090,
         0.0630,  1.3197, -0.0197,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8724e-01, -3.3542e+00,  5.4343e-01, -1.9861e-01, -2.3702e-01,
        -3.0249e-04, -3.3307e-01, -8.3877e-02, -2.0249e-02,  1.1229e-03,
         1.0927e-01, -1.2240e-01, -2.0211e-01, -9.2360e-02,  3.5852e-02,
        -1.4674e-01,  1.5496e-01,  4.1545e-02,  1.0761e-01, -5.0334e-02,
         5.8012e-02,  5.6295e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4838e+00, -4.9226e+00,  9.8597e-01, -7.2244e-02, -2.8938e-01,
         8.2144e-01, -3.0572e-02, -1.3015e-01,  2.5692e-01, -1.8427e-03,
         4.7472e-01,  1.3012e-01, -6.8244e-02,  5.1239e-02,  3.3231e-02,
         1.4132e-01,  2.9939e-02,  1.8046e-01, -5.0886e-03, -9.4550e-02,
         1.0759e-02, -1.1658e-01, -3.1457e-01, -4.9413e-02, -6.5672e-02,
         7.3280e-02, -2.1288e-01,  2.3666e-01,  7.7202e-02,  1.2794e-01,
         1.2098e-01, -2.9560e-01, -2.7056e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7607, -7.2716, -2.0288,  0.4897, -0.0917, -0.1261, -0.2795, -0.1026,
        -0.1040,  0.3460,  0.0332, -0.2947,  0.1442,  0.2275,  0.2735, -2.3738,
         0.0378, -0.3179,  0.0271,  0.2539, -0.3849,  0.1131, -0.0847,  0.1731,
        -0.0732, -0.3707,  3.2862,  0.7241,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5278, -2.3622, -0.6083, -0.2953, -0.0658, -0.1700, -0.1185, -0.4811,
        -0.8870, -0.2223,  0.0247,  0.0489,  0.0378, -0.0202, -0.0644, -0.1727,
        -0.3094, -0.0627, -0.2392, -0.0945,  0.0197, -0.1998,  0.0175, -0.2600,
         0.2333,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.1881, -5.4890, -0.7922, -0.0526,  0.2531, -0.0229,  0.0402,  0.0325,
         0.1128,  0.2193,  0.1180,  0.1273, -0.0088, -0.0820,  0.1087,  0.1706,
         0.2499, -0.1167,  0.0339,  0.0745, -0.0581,  0.0515,  0.1154,  0.2255,
        -0.2393, -0.0440,  0.4871, -0.1433,  0.2025,  0.0505, -0.0456,  0.0797,
        -0.0893,  0.0890,  0.0163,  0.2594, -0.0248, -0.1034,  0.0542, -0.4812,
         0.0628,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8337,  2.4297,  0.2437, -0.2841,  0.2962, -0.2011, -0.0030, -0.1224,
         0.1734, -0.1719, -0.1012,  0.2510,  0.1666, -0.0502,  0.0427, -0.0391,
        -0.0718, -0.0057,  0.0038,  0.0969,  0.0242, -0.2053,  0.0246, -0.1507,
        -0.0540,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1850,  2.7093, -0.2752, -0.4594,  0.1057,  0.1824,  0.3346, -0.1200,
         0.0670,  0.0253, -0.0451, -0.0392, -0.2054, -0.0271,  0.0662,  0.1970,
         0.2799, -0.1072,  0.1368,  0.2251, -0.0088,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0685,  2.4252,  0.6319, -0.0281, -0.0858,  0.1184, -0.0540, -0.0861,
        -0.2047, -0.5644,  0.4822, -0.1739, -0.0625, -0.2164,  0.0308, -0.1792,
        -0.1201, -0.4273,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4692,  2.4160, -0.3038,  0.6666,  0.0250,  0.2077,  0.1182, -0.2671,
        -0.1257,  0.1474, -0.0175, -0.0180, -0.0112,  0.0285, -0.1047,  0.0588,
        -0.1032, -0.0289,  0.1105, -0.1256,  0.0112, -0.1648, -0.0859, -0.0653,
         0.1423,  0.1057,  0.0314,  0.0281,  0.0035,  0.0231,  0.0309,  0.0325,
        -0.0539, -0.0385,  0.0895,  0.0685,  0.0531,  0.1057,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2455,  1.6206, -0.0342, -0.0725, -0.1448, -0.0036, -0.0248,  0.0677,
        -0.0418,  0.0248, -0.1496, -0.0536, -0.0432,  0.0523, -0.0622,  0.1171,
        -0.0464, -0.4356,  0.1186, -0.1738,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7629e+00, -6.1507e+00, -2.6535e+00, -2.4259e-01,  2.4742e-01,
         3.8213e-02,  6.1020e-03, -6.7535e-01,  5.6809e-02,  8.4783e-02,
         2.5257e-01,  2.7758e-01,  1.5904e+00, -2.8587e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6986, -5.0698,  0.3111,  0.3380,  0.4218, -0.1994, -0.2403,  0.1504,
         0.1126, -0.2149, -0.0127,  0.2653, -0.0917, -0.0443, -0.0929,  0.3672,
        -0.2951,  0.0230,  0.1450, -0.0261,  0.0611, -0.5635, -0.1915, -0.5139,
        -0.2376, -0.1288, -0.1116, -0.1955, -0.1097, -0.2178, -0.0264,  0.0051,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9675e+00,  6.6410e+00,  1.7087e-01,  6.9737e-01, -4.2193e-01,
         2.8867e-01,  3.4997e-01, -3.2986e-01,  6.1059e-02, -7.5861e-02,
        -1.2321e-01, -1.7336e-01,  3.9483e-02,  1.7818e-01,  1.1355e-01,
         6.1301e-02, -6.3411e-02,  3.6263e-03,  4.2648e-01, -1.1024e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.5332e-02,  1.6660e+00,  3.8505e-01,  6.5118e-02,  1.2046e-01,
        -1.0916e-01, -4.1464e-02, -4.1250e-02, -1.2080e-01, -9.8954e-02,
         4.3482e-02, -5.4116e-02, -1.9899e-02,  2.9848e-02, -3.3491e-04,
         2.3314e-03, -7.2362e-02, -4.4542e-02, -6.9333e-02, -8.9200e-02,
        -1.0743e-01,  3.8258e-02,  1.7023e-01,  8.0982e-02, -1.7652e-01,
         3.5179e-02, -6.1638e-02,  1.5763e-02, -1.6993e-01, -5.5634e-02,
         4.9349e-02,  1.7314e-02, -5.1955e-02,  2.8339e-03, -4.7332e-02,
        -5.5126e-02, -5.2034e-02, -1.6773e-02, -2.2902e-02, -3.5300e-02,
         3.6570e-02, -1.3989e-01, -5.0042e-02,  6.4036e-03,  5.4214e-03,
        -1.0944e-02, -9.6326e-02,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0563,  1.7231,  0.3044, -0.0763, -0.1647,  0.1624,  0.0192, -0.0485,
         0.3096, -0.0036,  0.0870,  0.2000, -0.1388,  0.7870,  0.0261,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2859e-02, -6.1957e+00,  2.5800e-01,  1.8584e-01, -1.2294e-02,
         9.3305e-01, -2.9022e-01,  3.7156e-01, -6.8693e-01, -7.9124e-02,
        -4.3198e-01, -9.7127e-02, -4.6384e-02, -5.6350e-02, -1.4408e-02,
         3.2280e-01,  1.7251e-01,  1.9792e-01,  1.0127e+00, -1.6734e-01,
        -1.2663e-01,  6.8414e-02, -1.9498e-02, -3.4940e-02, -1.3803e-01,
         3.3666e-01, -2.8871e-01, -4.5037e-01, -1.3062e-01,  3.7863e-02,
        -1.0300e-01,  7.2696e-03, -6.8917e-02, -5.5794e-03, -3.8920e-02,
        -1.4085e-01, -5.4070e-02, -9.9396e-02,  1.5386e-01, -1.1909e-01,
        -4.3761e-02, -6.1547e-02,  1.6596e-01, -7.0203e-02,  9.0952e-02,
         7.4217e-02, -4.3432e-01, -7.0737e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 1.4257e-01,  4.1596e+00,  7.2833e-02,  3.5218e-01, -1.5935e-01,
        -1.7720e-01,  1.2528e-01,  4.1864e-01, -4.1837e-01, -2.6472e-01,
         2.6733e-01,  2.3361e-01,  9.1937e-02, -3.5383e-02, -1.0072e-02,
        -1.6172e-03,  6.2588e-02,  1.7962e-01, -6.5186e-02, -1.6307e-02,
         1.1527e-02, -3.0171e-01, -1.5092e-01, -4.3110e-01,  4.9079e-01,
        -6.9098e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4835,  2.3862,  0.1903, -0.1055,  0.0818,  0.0642,  0.1189,  0.1351,
        -0.0467,  0.1544, -0.0050, -0.0650, -0.0189,  0.0507, -0.0374, -0.0405,
         0.0273,  0.0292,  0.1002,  0.3460, -0.0152,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.6328e-01,  1.8958e+00, -3.0850e-01, -7.1349e-01, -1.2107e-01,
        -4.7433e-02,  5.6804e-02, -2.4546e-02, -8.0242e-02, -1.0199e-01,
        -1.2645e-02,  9.1747e-03, -8.5576e-02, -8.8186e-02, -6.3244e-02,
        -1.6559e-02, -2.5334e-02, -2.3519e-01,  3.3683e-01,  4.2637e-02,
         8.9576e-02,  1.4270e-02,  3.8597e-01,  1.0473e-01,  2.1909e-01,
         1.3566e-02, -1.2979e-01, -6.5589e-02,  2.3210e-03,  7.2733e-02,
        -7.1301e-03,  4.0671e-04,  1.0196e-01, -7.6241e-02, -1.1893e-01,
        -1.9118e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5476,  2.0912,  0.5966, -0.1672, -0.0512, -0.1455,  0.5359, -0.1577,
         0.6202,  0.0363, -0.0405,  0.1005,  0.0413, -0.0312, -0.1043,  0.1123,
         0.0737, -0.0075, -0.0131, -0.4531,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6372e-01,  2.3987e+00,  2.4227e-02, -4.0475e-02, -8.1979e-03,
        -8.3647e-04,  3.0690e-02,  9.4078e-02, -1.9144e-01, -8.2920e-02,
         3.3694e-02, -9.7393e-02, -4.7497e-02, -8.4287e-02, -9.3030e-02,
        -1.2658e-01, -7.1882e-02, -6.5563e-02, -1.0701e-01, -2.6048e-01,
        -2.7691e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0861,  1.8294,  0.1890, -0.1180,  0.0320,  0.1080, -0.0307,  0.1682,
        -0.0396, -0.0639, -0.0296, -0.1342,  0.1306,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3095,  2.1456, -0.4003,  0.0857,  0.0338, -0.0378, -0.2432,  0.0434,
        -0.0235,  0.1232,  0.1490, -0.0507, -0.0891, -0.0075,  0.2250, -0.0027,
        -0.1441, -0.1015,  0.0998, -0.1406,  0.0842,  0.1110,  0.1039,  0.2185,
        -0.0419, -0.0864, -0.0542,  0.1096,  0.1257,  0.0876,  0.0037,  0.0442,
         0.1220, -0.0284, -0.0435, -0.0447,  0.0069], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.0402e-01, -3.9207e+00,  6.7118e-01, -2.4244e-01,  1.9819e-01,
        -1.2115e-01,  9.1659e-02, -4.0275e-01, -3.8158e-03, -3.7459e-02,
        -1.0893e-01, -2.1898e-02,  2.1707e-01,  1.2046e-01, -1.6951e-01,
         1.4055e-01, -2.0116e-02,  1.9058e-01,  6.2903e-02,  1.3950e-01,
         3.0868e-01,  2.7814e-01, -2.1002e-01,  5.6006e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2575,  5.0095, -0.3562, -0.0854, -0.4022, -0.1433, -0.4538,  0.1631,
        -0.4587,  0.5151, -0.0808, -1.1309, -0.6748, -0.1483, -0.2714, -0.3573,
        -0.3286,  0.2121, -0.1068,  0.2248, -0.4519, -0.8055, -0.1821,  0.1874,
        -0.3024, -0.1708, -0.1323, -0.1690,  0.1021,  0.4014, -0.1510, -0.2296,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2659,  2.6080,  0.1834,  0.2252,  0.0953,  0.0351, -0.2321, -0.5201,
         0.1619,  0.0210,  0.0759,  0.0145, -0.1344, -0.0337, -0.0411,  0.1386,
        -0.0941,  0.1447,  0.0952,  0.0745,  0.0798, -0.0259,  0.1059, -0.0175,
        -0.0991, -0.0941, -0.1559,  0.1779,  0.0486, -0.2938, -0.2242,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1075,  4.2408,  0.5289,  0.0403,  0.5049,  0.2775, -0.0658, -0.1701,
         0.2590,  0.0468, -0.1717,  0.1072,  0.3564, -0.2846,  0.2064,  0.3118,
         0.0366,  0.1700,  0.1352,  0.0905,  0.3019,  0.4315, -0.2164,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2440e-03,  4.0672e+00,  7.7116e-01,  8.2595e-01, -1.5529e-01,
         2.1188e-01, -1.8398e-01,  1.9715e-01, -8.3026e-02, -1.9631e-01,
        -1.5690e-02, -7.1621e-02, -6.0093e-02,  1.5019e-01,  6.4698e-02,
        -2.2640e-02, -1.8887e-01, -9.0395e-02, -1.5234e-01, -3.9018e-02,
        -4.4639e-02, -4.6490e-02,  4.9340e-02, -4.6248e-02,  1.1023e-01,
        -2.4255e-04, -1.3045e-01,  6.7799e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.3365,  5.4046, -0.0331,  0.3236,  0.0268,  0.0724,  0.1049,  0.0797,
        -0.3022, -0.2355, -0.3397, -0.1723, -0.1195, -0.1383,  0.0521, -0.0570,
         0.0817, -0.4045, -0.2447,  0.0431, -0.1733, -0.0114, -0.1475,  0.0385,
        -0.0769,  0.1050,  0.1705, -0.9702, -0.3947,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6331, -1.5830, -0.0669,  0.2113, -0.2231,  0.0830,  0.0690,  0.0591,
         0.2037,  0.2390,  0.1693, -0.0082,  0.0153,  0.1538,  0.1342, -0.0218,
        -0.0250, -0.0701,  0.0032, -0.1428,  0.0696,  0.2033,  0.7437,  0.1609,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9191,  8.5746,  1.9390, -0.2451,  1.2251, -0.8029,  0.7222,  0.7977,
         0.4861, -0.1346,  0.4855, -0.3221, -0.3555,  0.0740, -0.3437, -0.3754,
        -0.8663, -0.3055, -0.4624, -0.0475,  1.1124, -0.1696,  0.0590, -0.3373,
         0.2312, -0.4073, -0.1122, -0.1206, -0.1295,  0.0687, -0.7363, -1.2836,
        -1.8868,  0.0888], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6310e-01, -4.3620e+00, -4.0563e-01,  9.7351e-02, -6.1930e-02,
        -1.3483e-01, -2.7347e-01, -2.9008e-01, -7.1787e-02, -1.2465e-01,
        -2.9078e-01, -3.3857e-02,  3.7729e-02, -1.9766e-01, -2.9123e-01,
         3.0293e-01, -6.0741e-02, -2.0419e-01,  1.5787e-01, -1.3762e-03,
         4.4174e-01, -5.3402e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6007, -5.7447, -0.7050,  1.4169,  0.9703,  0.6747,  0.3094, -0.7509,
        -0.2294,  0.1476, -1.8393,  0.0974,  0.2221, -0.1734,  0.3155,  1.1316,
         1.1006, -0.2807,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1399e+01,  3.2284e+00, -2.1893e+00, -1.2632e+00, -2.0244e+00,
        -9.1503e-02,  2.1548e-01, -3.8202e-02, -6.0425e-01, -9.8102e-03,
         3.1732e-01, -5.8095e-01,  7.1937e-02,  3.2490e-01, -2.9392e-01,
         1.6073e-01,  4.8639e-01,  1.9959e-01,  4.7190e-01,  2.4497e-01,
         3.6029e-01, -7.7015e+00, -8.9510e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6729, -1.4747, -0.0359, -0.0842,  0.3255, -0.1374, -0.2816, -0.2020,
        -0.1811, -0.2864,  0.0398, -0.0297, -0.1321, -0.0152,  0.0083,  0.3404,
        -0.2048,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1176,  4.3417,  0.9592, -0.3724, -0.4981,  0.3815, -0.1185, -0.2311,
        -0.2940, -0.1893, -0.0320, -0.3395, -0.4768, -0.2184,  0.2929, -1.5074,
        -0.3293,  0.2844,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3238, -5.7930,  1.6325,  0.1031,  1.1887, -0.1695,  0.2058, -0.4965,
        -0.2575, -1.2742,  0.1549, -0.1594,  0.2958,  0.4273, -0.3591,  0.1610,
         0.0931, -0.4114, -0.3535,  0.5760,  1.0486, -0.0167, -0.3302,  0.6809,
        -0.4364, -0.6312,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8239,  2.7253,  0.6033,  0.0060, -0.0633,  0.4772, -0.2107, -1.0858,
         0.0219, -0.0789, -0.4030, -0.5748, -0.1206, -0.2914, -0.2301,  0.1793,
        -0.1199,  0.2283, -0.1167,  0.0719, -0.3988,  0.1325, -0.3568,  0.1652,
        -0.2315,  0.1580,  0.0870, -0.1486,  0.6418, -0.0621, -0.3918,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7293,  6.5371, -0.3896, -0.6694,  0.0400,  0.0878, -0.2302, -0.1021,
         0.4472,  0.5358,  0.0378, -0.2484,  0.2021, -0.0486, -0.0665, -0.1010,
        -0.1989, -0.0296,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0164,  8.1161,  0.0704, -1.1599,  0.3520,  0.4576, -1.2320,  1.8362,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.1056e+00,  5.6978e+00,  2.8618e-01,  1.5344e-01, -1.2288e-01,
         1.9515e-01,  2.2192e-01, -4.9718e-03, -1.0494e+00,  4.0032e-01,
        -1.0043e-01, -5.5385e-01, -5.2943e-02,  1.4165e-01, -2.3672e-02,
         3.1241e-01,  2.9682e-02, -2.0004e-01, -1.5136e-01, -3.9454e-02,
         2.6945e-02, -2.5809e-02, -2.6741e-01, -1.3810e-01, -1.8825e-01,
        -1.3945e-01,  8.3460e-02,  6.4261e-02, -3.6687e-02,  1.2584e-01,
        -2.9373e-01, -5.2796e-02, -1.6938e-01, -1.2680e-01, -1.0931e-01,
         4.0113e-02,  1.5878e-01, -1.4085e-01, -1.2791e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.2012e-01,  4.5101e+00, -4.4061e-03, -5.3135e-02, -2.4942e-01,
        -1.0290e-01, -7.5542e-02, -1.5827e-01,  1.3293e-01, -3.6233e-01,
        -1.0296e-01, -9.5297e-02, -3.9283e-01, -1.9035e-01, -4.2328e-02,
         1.4638e-02,  1.1011e-01, -4.7128e-02,  9.3760e-03, -2.1012e-01,
        -6.0120e-01, -3.4134e-01, -1.5017e-01, -2.3315e-02, -1.2606e-01,
        -3.8608e-02, -7.0865e-01, -5.0821e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1008,  3.3952, -0.0089,  0.8735,  0.1676, -0.1495,  0.0849, -0.0977,
         0.0150,  0.0195,  0.0573,  0.0038, -0.5103,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3082,  3.6721,  0.1599, -0.0356, -0.1821,  0.0344, -0.2767, -0.1059,
         0.1327,  0.0987, -0.1315,  0.0712, -0.1771, -0.3251, -0.5303,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9065e-01,  5.9240e+00,  2.1636e+00,  5.4270e-01,  2.1785e-01,
         1.0577e-01, -8.5067e-02, -2.8406e-01, -1.1872e-01,  1.3025e-02,
         3.2557e-01,  2.0859e-01,  2.5976e-01, -1.9980e-03, -3.7878e-02,
         2.1116e-01,  5.9024e-02,  1.3649e-01, -6.3160e-02, -2.2381e-01,
         2.7887e-02,  1.0653e-02, -9.5190e-02,  1.6931e-01,  2.2223e-01,
        -3.2475e-01, -1.0535e-01,  2.3133e-02,  2.8512e-01,  1.6902e-01,
         4.3161e-02, -1.6975e-02,  1.1880e-01,  3.0906e-01,  2.0333e-01,
         3.7815e-01,  1.1175e-01,  2.3182e-02, -2.2096e-02,  2.2104e-02,
         4.1311e-02,  9.7896e-02, -1.0047e-01, -1.1440e-02,  1.3791e-01,
        -4.3159e-02,  7.0370e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3693,  4.2783, -0.7811, -0.0689, -0.5866, -0.5560,  0.1362,  0.2145,
         0.0589, -0.1472,  0.1941, -0.0468,  0.1807,  0.0721,  0.0588, -0.0273,
         0.1032, -0.4114,  0.0895,  0.0122,  0.1307, -0.0640,  0.0823, -0.0602,
        -0.0727,  0.0691, -0.1452,  0.0348, -0.1737,  0.1193, -0.0223, -0.0068,
        -0.1579,  0.0341, -0.0152, -0.0422, -0.1249, -0.0520, -0.0899,  0.1817,
        -0.2383,  0.8019,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -3.1146, -11.1153,  -0.8240,  -0.3230,   2.3535,  -0.2536,  -0.2551,
         -0.9519,   0.7683,   0.6129,   0.7126,   0.6962,  -0.4594,   0.0750,
         -0.2447,  -0.0638,   0.8471,  -0.4314,   0.3404,   0.4768,   0.3930,
          0.2123,   0.0931,   0.1258,   0.0945,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3025, -5.8006, -0.0172, -0.5919,  0.0601, -0.0785, -1.3856, -0.0476,
        -0.0093, -0.0552, -0.0928,  0.0633, -0.1050, -0.1592, -0.1149, -0.2132,
        -0.1466, -0.4982, -0.0506, -0.1916,  0.2397,  0.0571, -0.3066,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9752, -5.7362, -0.4122, -0.1790, -0.6167,  0.3528, -0.1515,  0.5118,
        -0.2102, -0.3518,  0.4069,  0.0772, -0.2037, -0.2095,  0.0744, -0.1051,
         0.1010, -0.0094,  0.2151,  0.0115,  0.0818,  0.2003,  0.1240,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1017, -5.6230,  1.3994,  1.7139,  0.9062,  0.0325, -0.1466, -0.1310,
         0.2943,  0.3412, -0.3865,  0.3425,  0.7037,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1295, -2.5208, -0.1954,  0.0458,  0.2592,  0.0327,  0.0563,  0.0050,
         0.1415,  0.4521,  0.2590,  0.3066,  0.7223, -0.0140,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2249, -5.8580,  0.7255,  1.1554, -0.4411, -0.2888, -0.4997, -0.5117,
        -0.1587,  1.0194, -0.0595, -0.3937, -0.0352,  0.0075, -0.3821,  0.1200,
         0.0764,  0.3707,  0.3989,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-3.5366,  4.8394, -0.7915,  0.7491,  1.0398, -0.0171, -0.1841,  0.1438,
        -0.3837,  0.0539, -0.0886, -0.1608, -0.0558, -0.3757,  0.3624, -0.2049,
         0.1468,  0.0163, -0.3600, -0.0303,  0.2092,  0.2302, -0.0679, -0.1603,
         1.4937,  0.8549,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3464, 14.9782,  1.3984,  0.0248, -1.4556,  0.8388, -0.6115, -1.1325,
        -0.5295, -0.7625, -1.0061,  0.0948, -0.5705,  0.0760, -1.9327,  0.2244,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6451,  4.9232,  0.1216,  0.6148, -0.0783, -0.2429,  0.1492,  0.2047,
        -0.4082,  0.2565, -0.0157, -0.0252,  0.0125, -0.0057, -0.0571,  0.2728,
         0.0609,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9656,  5.1263,  0.0705, -0.4716, -0.1028,  0.1600, -0.2891, -0.1902,
         1.3872,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1222, -5.5286, -1.6436, -0.2801,  0.3572,  0.0696,  0.1651,  0.3196,
         0.1336,  0.1092,  0.0960, -0.0950, -0.0178, -0.1114, -0.1616,  0.0292,
        -0.0182,  0.0176, -0.0075, -0.1560, -0.0831, -0.1112, -0.2059, -0.0723,
         0.0127,  0.0732, -0.1982, -0.0809, -0.1513,  0.0544, -0.0177, -0.0486,
         0.0269,  0.1200, -0.1942,  0.0230,  0.1315, -0.0980,  0.0982, -0.0926,
         0.1028,  0.0429, -0.1289, -0.3126, -0.2647,  0.3123], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4153,  2.5757, -0.9031,  0.0140,  0.7432,  0.3395,  0.2054, -0.0743,
         0.0453,  0.0944,  0.0176, -0.0332, -0.0741,  0.1268,  0.0937,  0.4285,
         0.0466,  0.2405,  0.0445,  0.0496,  0.0477,  0.0317,  0.2884, -0.0447,
        -0.0942, -0.0204,  0.3069,  0.0828,  0.0930, -0.1613,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7087, -7.9479, -0.3508, -0.5901,  0.3718,  0.9065, -0.9805,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3316, -6.0147, -1.1739,  0.5927,  0.1817,  0.2226, -0.2796,  0.1780,
         0.2538,  0.7045,  0.0412,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1955,  3.2370,  1.0766, -0.8591, -0.8755,  1.0141,  0.1044, -0.8465,
         0.6005, -0.0634, -0.0805,  0.0805,  1.2091, -1.6858,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4448e-01, -9.4539e+00, -1.5676e+00, -1.6114e-01,  1.1111e-01,
         4.7372e-01,  1.0757e-01, -1.2467e-01,  4.3996e-01,  1.9996e-02,
         7.0848e-02,  3.4964e-01,  1.5366e-01,  2.7136e-01, -1.5300e-02,
         2.6099e-01,  4.0559e-01,  1.0906e-01, -7.6158e-02, -8.5168e-02,
        -2.5379e-01,  9.4383e-02,  4.0229e-01, -4.3287e-02,  4.6186e-02,
         3.9265e-01, -2.8520e-02,  4.8326e-03,  1.5446e-01, -2.7100e-01,
        -1.9274e-01,  3.6721e-01, -1.8821e-01, -4.3185e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9878e+00, -5.4822e+00, -4.4163e-01, -1.4692e-01,  2.6276e-01,
        -1.5097e-01, -2.5848e-01,  1.3206e-01, -3.4265e-01,  8.7871e-01,
        -2.2760e-01, -1.2603e-01, -2.0351e-02, -1.8882e-01, -9.4256e-01,
         2.1418e-01, -2.9388e-01,  4.4606e-01,  5.2244e-01,  4.6927e-03,
        -1.8311e-01, -2.5893e-02,  2.2768e-02,  3.4479e-02, -1.9982e-01,
         5.5334e-02,  1.2742e+00,  3.4188e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1107, -7.4632,  0.1927,  0.7005,  0.1586,  0.0775,  0.1009,  0.2257,
         0.3900,  0.0474,  0.2369,  0.2498,  0.6313, -0.3186,  0.1669,  0.1289,
         0.1528,  0.0241,  0.4762,  0.1210,  0.2512, -0.0699, -0.4086, -0.6551,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 5.5722e-01, -2.2484e+00, -1.0308e-01, -1.2153e-01,  3.2922e-02,
         9.5692e-02,  1.0862e-01, -6.3748e-02, -1.6450e-01, -1.9876e-02,
         3.1630e-02,  8.0655e-03,  1.5586e-01, -7.2659e-04, -4.4190e-03,
         1.6640e-01, -3.5257e-01,  1.4421e-01,  2.6220e-01,  1.6260e-01,
        -7.0331e-02,  3.3237e-02, -9.9681e-02, -8.0755e-02, -2.0583e-02,
        -1.7578e-01,  9.4060e-03, -5.8293e-02, -3.0771e-02, -2.7686e-02,
         2.8092e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1977, -3.3816,  0.6814, -0.0519, -0.0798, -0.6185, -0.3339, -0.2053,
        -0.2951, -0.0894, -0.5197,  0.6267,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4886, -3.6869, -0.5345,  1.4521,  0.0555, -0.0949, -0.1888,  0.0406,
        -0.0579,  0.3591, -0.1132,  0.1249, -0.1089,  1.2179,  0.2079,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0752, -2.3245,  0.1729, -0.1693, -0.2428, -0.0523, -0.2380, -0.0235,
         0.0252, -0.1563, -0.1025,  0.1284, -0.0744,  0.0195, -0.0295, -0.0513,
        -0.2045,  0.0883,  0.0153,  0.0950, -0.0116,  0.0961, -0.0042,  0.1595,
        -0.1136, -0.0076,  0.0444,  0.0635,  0.0209,  0.1101, -0.0166, -0.1680,
        -0.2443], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5486,  3.6016, -1.6811, -0.0378, -0.0079,  0.1637,  0.0395, -0.0381,
         0.0976,  0.1968,  0.1091,  0.1729,  0.0274, -0.1118, -0.2354, -1.9452,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6706,  6.4116, -0.4522,  1.2218, -0.4782, -0.4433,  0.1781, -0.1861,
         0.4874,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0268,  6.9952, -0.7766,  0.7417, -0.5475,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3923e+00, -4.2057e+00, -4.9329e-01,  7.8284e-01,  5.2735e-01,
         6.1244e-01,  3.3769e-01,  1.6401e-01,  1.6408e-01, -9.6282e-02,
        -1.0002e+00, -5.2770e-01,  1.7478e-03, -6.7064e-02, -1.6968e-01,
        -3.0645e-02, -8.3475e-02, -4.3285e-01, -3.7658e-01, -5.4184e-01,
        -1.7349e-01, -1.0657e-01,  9.2796e-02,  3.0559e-01, -2.2541e-01,
        -1.1663e-01, -1.2855e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7461, -1.7908, -0.5479, -0.5866,  0.0698, -1.6645,  0.7231,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9050, -6.4821, -0.1441,  0.0574,  0.0774, -0.0910,  0.0356, -0.0939,
         0.1915,  0.1603,  0.0143, -0.1251, -0.1045,  0.0283, -0.2731, -0.0463,
        -0.3281,  0.1975,  0.3053, -0.3514,  1.7279,  0.4759,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6582,  4.1709, -0.1091,  0.1336, -0.4235,  0.2478, -0.0632, -0.7126,
         0.0702, -0.1084,  0.2892,  0.1781, -0.1803, -0.2235, -0.0292, -0.1127,
        -0.1269,  0.1812, -0.2322,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3946e-01, -4.0478e+00,  3.1887e-01,  9.2932e-02,  2.4713e-03,
        -4.3995e-02, -4.9894e-02, -8.2410e-02, -2.0235e-01,  1.1614e-01,
         7.7693e-02, -1.5695e-02, -5.3730e-03, -2.1897e-01, -3.9364e-02,
         2.0822e-02, -1.0690e-01,  2.5002e-01, -4.4303e-02,  3.7633e-02,
         1.3571e-01, -1.9092e-01,  2.9307e-02,  4.9202e-02, -5.2081e-02,
         4.2306e-02,  2.5621e-01, -2.3385e-01,  1.6721e-01,  2.3869e-01,
         2.5862e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-9.6176e-01,  1.1143e+01, -3.7494e-01, -1.2608e+00, -2.9919e-01,
         4.5359e-01, -2.1131e-01, -7.8724e-02, -7.0932e-01, -3.4473e-01,
         2.0958e-01,  2.5372e-01, -5.6934e-01, -4.5789e-01,  9.0242e-01,
         5.1791e-01, -1.5892e+00,  2.5048e-01, -2.1130e-02,  5.2156e-01,
         8.0994e-03, -7.7151e-02,  3.7801e-02, -2.4975e-01, -1.4734e-01,
         2.1428e-01,  5.0964e-03, -1.9670e-01, -1.0448e-01,  4.2272e-01,
        -2.2920e-01, -1.1952e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0210, -2.0110, -0.8072, -0.3705, -0.1034, -0.2258, -0.4406, -0.1868,
        -0.1685, -0.0711, -0.0415,  0.0372,  0.2055, -0.6602,  0.3791, -0.1985,
         0.0948, -0.1125,  0.0038, -0.1538, -0.0049,  0.2116,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7218,  2.5029,  0.3967, -0.2949,  0.0426,  0.3902, -0.5851,  0.1711,
         0.0356, -0.1271, -0.0177,  0.0358,  1.1597,  0.0896,  0.1527, -0.1149,
        -0.1981,  0.1088,  0.1428, -0.2134,  0.1391, -0.0437,  0.0893,  0.0305,
        -0.0529, -0.1185, -0.0208,  0.2861,  0.2052, -0.5637,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0129, -6.9056, -1.5072, -0.5130,  0.2914,  0.1216, -0.0116, -0.0650,
        -0.1001,  0.2048, -0.0370, -0.3661,  0.1998, -0.2321, -0.0087,  0.2171,
        -0.5003, -0.4418,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2164, -2.8242,  1.0366,  0.6818,  1.3208, -0.2883, -0.5234, -0.5253,
         0.2306,  0.0841, -0.1102,  0.1350, -0.0881, -0.1150, -0.0338,  0.0120,
        -0.3479,  0.0436, -0.0684,  0.1225,  0.1288,  0.0133,  0.0465, -0.0263,
        -0.1085,  0.0203, -0.0892, -0.0244, -0.0979,  0.0449, -0.0661, -0.2042,
        -0.0752, -0.2162, -0.0796, -0.4414,  0.0711,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1840e+00,  7.2045e+00, -4.3505e-01,  1.8887e+00, -3.0589e-02,
        -1.5606e-01,  1.1988e-01, -4.2088e-01, -1.2332e-01,  4.4424e-02,
         1.3363e-01,  1.9307e-02, -8.4607e-02, -2.8417e-01, -8.3209e-02,
         9.1403e-04,  3.0788e-01, -8.0050e-02,  2.2426e-01, -1.8408e-01,
        -5.0462e-01,  1.8673e-01, -9.2357e-01,  2.7436e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.8700e-01, -3.6367e+00,  4.3777e-01,  4.5963e-01, -2.4062e-01,
         2.5670e-01, -3.8363e-01, -5.9432e-01,  1.5347e-01, -5.0999e-03,
         3.4746e-03, -9.6336e-02,  3.7985e-03, -1.1071e-01, -2.7361e-02,
        -6.9312e-02, -2.6448e-02,  3.9349e-02,  1.1127e-01,  9.2482e-02,
         3.8087e-02,  5.1539e-02,  2.4462e-01, -1.6442e-01, -2.4196e-01,
        -4.4913e-01, -3.0251e-03, -2.0330e-02,  5.8417e-02, -5.2806e-02,
        -1.4701e-01, -2.7039e-02,  1.3812e-03, -5.6092e-02,  2.1304e-02,
         2.3400e-01,  4.9193e-02,  5.4084e-02, -3.7383e-02, -6.7829e-03,
         2.6830e-01,  2.9484e-02,  4.5055e-02, -1.7006e-01,  4.9329e-01,
        -4.7027e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4500, -2.5342,  0.0385, -0.1012,  0.3623, -0.1010, -0.0130, -0.0429,
        -0.0591,  0.0302,  0.0457, -0.0587, -0.1830,  0.0221, -0.0464, -0.0628,
         0.1131, -0.1194, -0.0930,  0.0298,  0.0518,  0.5191,  0.1116,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6212, -7.6061, -0.5592, -0.0681,  0.4568,  0.1170, -0.4122,  0.1805,
         0.0625,  0.1100, -0.1928,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8545,  3.1458,  0.8497,  0.0505, -0.1207,  0.5424,  0.3320, -0.0052,
         0.0578, -0.1985,  0.1468,  0.1025, -0.1432,  0.1345,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8323,  5.5913,  0.6510, -2.5159,  0.9403, -0.3116,  0.6567, -0.6653,
        -0.0845, -1.1677,  1.3065,  1.2438,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0543e+00, -6.7142e+00,  5.2875e-01,  1.9429e+00,  2.8537e-01,
        -5.5191e-01,  9.3554e-02, -4.4955e-02, -1.5578e-01,  1.7410e-01,
        -3.9210e-02,  3.6597e-01, -7.1579e-02, -7.0022e-02,  1.2641e-01,
         1.3786e-02,  2.6701e-01, -1.9902e-01,  2.5561e-02,  2.4175e-01,
        -4.3570e-02,  2.9830e-02,  8.6317e-02, -3.6449e-01, -3.7952e-02,
        -3.7015e-01, -4.2997e-02,  1.2723e-01, -4.1383e-02, -1.2796e-01,
        -3.8253e-02,  3.7301e-01, -1.4969e-01,  1.6084e-02, -9.1229e-02,
         9.1947e-02,  5.7268e-03, -1.2662e-01,  4.3243e-02, -1.1194e-01,
        -6.7685e-02,  4.1925e-02, -6.5000e-02,  6.9667e-02,  1.3917e-01,
         4.8754e-02, -6.1728e-03,  8.5433e-02, -2.9448e-02, -1.3259e-01,
        -5.5258e-02,  1.8456e-01, -8.9276e-02, -3.6760e-01, -3.7506e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-8.3136e+00,  1.0361e+01,  1.5175e+00, -1.0169e+00, -4.4075e+00,
        -6.2614e-01,  1.8574e+00,  3.3916e-01, -1.9721e-01,  2.9786e-03,
         2.4501e-01, -1.2086e-01,  1.3583e-01, -6.4620e-01,  3.9583e-01,
        -2.4801e-01, -5.1816e-01, -2.7532e-01, -7.9701e-02,  2.2201e-03,
        -6.9054e-02,  7.8373e-01,  1.2335e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3529e-01,  3.2144e+00,  6.2303e-01,  1.1213e-01, -1.1255e-01,
         8.8344e-02,  1.7313e-04,  9.9828e-02, -2.9075e-01, -1.8954e-02,
        -2.0646e-01,  1.4479e-01, -1.5477e-02,  1.1689e-01, -9.2478e-02,
        -9.5466e-02, -5.5907e-01, -4.1364e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5924,  3.7679, -0.3653, -0.9012,  0.0717,  0.2697,  0.7198,  0.4111,
        -0.1029, -0.3187,  0.0777, -0.0112,  0.7601, -0.2056, -0.1251,  0.0309,
        -0.5548,  0.1102,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3209,  4.6405,  0.2669, -0.3176,  0.1603,  0.4852, -0.4466, -0.8310,
        -0.2449, -0.1022,  0.0973,  0.5972,  0.0444,  0.8591,  0.7891,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3361,  7.0047,  0.6638,  0.3175,  0.5418, -0.2000,  0.0174,  0.2211,
         0.1308, -0.1044,  0.5262,  0.0580,  0.2629, -0.0648,  0.5636, -0.6181,
        -0.0707,  0.3560,  0.2012,  0.0814,  0.0083, -0.1257, -0.0704, -0.1862,
        -0.1529, -0.2539,  0.6077,  0.1782,  0.1705, -0.0844,  0.1208,  0.3406,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9273e-01, -2.4639e+00,  6.7068e-02, -1.7738e-01,  3.3939e-01,
        -1.6329e-01, -2.6972e-02,  7.7925e-02,  1.5045e-01,  2.3559e-02,
         8.2161e-02, -7.0512e-02,  9.5071e-02, -9.2593e-02, -7.1124e-02,
         1.2242e-01, -9.9543e-02,  7.4320e-02, -2.0397e-01,  5.9572e-02,
         6.3087e-02, -1.7040e-01,  2.0747e-01, -8.4617e-02,  3.6395e-02,
         1.8905e-02, -5.5793e-04, -6.1642e-02,  1.1962e-01,  2.4502e-01,
         5.0905e-02,  1.4705e-02, -2.9815e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1475e-01,  5.2141e+00, -3.5575e-01,  5.7997e-01,  2.1095e-02,
        -1.2245e-01,  2.6037e-02, -1.5870e-01, -9.4481e-02, -1.2723e-01,
        -5.4737e-02, -9.9173e-02,  3.8866e-03, -9.2004e-02, -3.1257e-01,
         1.9261e-01,  1.2300e-01, -2.6579e-02, -3.3898e-03,  8.0437e-02,
        -3.9384e-02,  6.6515e-02, -3.5503e-02,  8.2469e-02, -1.1465e-01,
        -2.3360e-02,  7.4366e-02, -9.4817e-02,  3.3888e-02, -9.8738e-03,
         1.4669e-02,  3.8880e-02, -5.7091e-02, -6.2297e-02,  1.9795e-02,
        -3.2179e-02,  4.1255e-02, -1.0314e-02,  3.0769e-02, -1.2545e-02,
        -7.1671e-03, -3.1931e-02, -2.1622e-02, -5.9336e-02, -1.6093e-02,
         3.4618e-02,  1.2975e-02, -5.0233e-02, -5.8068e-02, -2.4037e-02,
         8.4325e-03,  3.5975e-02, -1.1748e-01,  2.2720e-02, -4.5609e-02,
        -2.3899e-02, -1.8340e-01,  2.2795e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1234, -5.0268,  0.1258,  0.6519,  0.1353,  0.1903,  0.0624, -0.1135,
        -0.0766, -0.0424,  0.0502, -0.0461,  0.1970,  0.1784,  0.1871, -0.0499,
         0.0587,  0.1829, -0.0557,  0.0613,  0.0166, -0.1168,  0.2652,  0.0400,
         0.4530,  1.3150,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2167, -3.5460, -0.3220, -0.0834,  0.0691, -0.0460,  0.1570, -0.0534,
         0.0365,  0.1581,  0.4814,  0.0512,  0.0145, -0.0715,  0.0826, -0.0216,
         0.0830,  0.0470, -0.0244, -0.0075,  0.0328,  0.0776,  0.0850, -0.2137,
         0.2387,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6143e-01, -4.6634e+00,  2.6659e-03, -3.9438e-02,  1.0593e+00,
         2.1353e-01,  3.7001e-02,  5.1790e-01,  3.8015e-01, -7.0958e-02,
         3.1316e-01,  3.7536e-01, -1.4150e-02, -6.0025e-01, -3.4870e-01,
        -3.7665e-01, -3.4836e-01, -2.7589e-01, -7.0303e-02, -2.0555e-01,
        -2.7593e-02, -2.1213e-01, -8.6115e-03, -1.5198e-01, -5.4707e-01,
         1.0312e-01, -6.3739e-02, -7.8751e-02, -5.5746e-03,  5.6516e-02,
        -1.7879e-01,  3.9706e-02,  1.5775e-01, -3.3140e-02,  5.3778e-02,
        -1.0060e-01, -3.3904e-01, -8.1564e-02, -3.5636e-02, -1.3550e-01,
        -7.6919e-02,  7.0328e-03, -3.8811e-02,  5.0304e-01,  2.7010e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3296, -3.9809, -0.6886, -0.7894, -0.0579, -0.1025, -0.4621, -0.3080,
         0.1677,  0.1090, -0.1327, -0.0930, -0.1166, -0.1205, -0.1687,  0.1770,
        -0.2058, -0.5219, -0.0399,  0.1103,  0.0566,  0.3374,  0.8351,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6558,  3.1820, -0.5575,  0.0438,  0.0355,  0.3120, -0.3687, -0.0463,
         0.0870,  0.2188,  0.1691,  0.1383,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 6.1240, -7.5530, -2.5680, -0.3111, -0.5757,  0.5423, -2.1855,  0.1183,
         0.6893, -0.0839, -0.3464, -0.1213,  0.2165,  0.0637, -0.1209, -0.5446,
         0.2725, -1.0433,  0.4160, -0.0562,  0.2315, -0.8779, -0.8170,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0669, -3.5498, -0.5919, -0.3598, -0.6874,  0.0345, -0.2123,  0.1976,
        -0.0673, -0.0305,  0.1453, -0.0714,  0.0450, -0.0254,  0.4109, -0.0111,
        -0.0278, -0.1101,  0.0626,  0.0363,  0.0215,  0.0106, -0.1826,  0.1251,
         0.0969,  0.0729,  0.0376, -0.0206,  0.0515,  0.0523,  0.0167, -0.3965,
        -0.2871], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9497,  3.3633,  0.0995,  0.1766,  0.1246, -0.2307,  0.2628,  0.2465,
         0.0510, -0.0736,  0.1596,  0.0626, -0.1662,  0.4004,  0.0061, -0.0594,
        -0.3254,  0.1600,  0.1857,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5386, -8.2703,  0.5605,  0.2050, -0.3983, -0.7484,  0.4886,  0.1406,
         0.2779, -0.6447, -0.1859, -0.2295,  0.1744, -0.5895,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4963,  6.7695,  0.3671, -0.0590,  0.2763, -0.2275,  0.0778,  0.2177,
         0.0700, -0.0488,  0.4492,  0.1009, -0.0980,  0.3310, -0.0367, -0.1301,
         0.0849,  0.0791,  0.0650, -0.0756,  0.1778, -0.2764,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7968,  6.3619,  0.9042, -0.6014,  0.5040,  0.3016,  0.1801, -0.4932,
        -0.1938, -0.0553,  0.2637, -0.1915, -0.4400, -0.0779, -0.1044, -0.1758,
        -0.2371, -0.2337,  0.2300, -0.1076, -0.2377, -0.0182, -0.0759,  0.1416,
        -0.0862,  0.0677,  0.0067,  0.1736, -0.4023, -0.0567,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1926, -3.3401, -3.2832, -2.1654, -2.7166, -0.9987,  2.1416,  0.0239,
         0.4007,  0.1449, -0.3735, -0.2314, -0.3889,  0.0518,  0.7024, -1.6926,
         1.0262,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3445,  4.7037, -0.2339,  0.9658, -0.0458, -0.0158,  0.2836,  0.5604,
        -0.7098, -0.0880, -0.2245,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6078,  4.7108,  0.7216, -0.1051, -0.3410, -0.4892,  0.3128, -0.0415,
        -0.0160,  0.2471,  0.4245,  0.1258,  0.0452,  0.7856, -0.3192, -0.1469,
         0.4262,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4698e-02,  1.9419e+00,  2.7540e-01,  4.9817e-03, -4.5632e-02,
         7.0048e-02,  6.4805e-02, -1.4612e-02,  7.7531e-02,  9.9643e-02,
        -2.1253e-02, -1.2317e-02, -8.5217e-04,  1.7705e-01, -1.4521e-02,
         2.8228e-02,  9.0927e-02, -1.5437e-03,  1.8153e-01,  2.1223e-02,
        -1.4139e-02, -9.1617e-02, -9.7032e-02, -5.3290e-02, -9.4541e-02,
        -4.6247e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1939e+00,  8.5864e+00,  2.3174e-01, -5.2375e-02, -2.4067e-02,
        -2.2651e-02, -2.0310e-01,  6.1254e-01,  5.1887e-01,  3.8815e-03,
        -4.8562e-01, -9.3705e-02,  9.3722e-02,  1.5088e-01, -5.0998e-02,
        -1.5660e-01, -1.3881e-01, -2.2127e-01, -3.6070e-02, -1.1272e-01,
        -5.3293e-01, -7.0086e-02,  4.4207e-02,  2.4039e-01, -1.0805e-01,
        -1.7289e-02, -5.1657e-02, -1.2472e-01, -9.0431e-01, -4.4443e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0910, -2.5137, -0.2847,  0.1411,  0.2918,  0.0342, -0.1209, -0.1163,
        -0.1522,  0.0113, -0.0249,  0.1311,  0.0604, -0.1138, -0.2953, -0.1443,
         0.1562, -0.1398, -0.4319,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 8.0766e-01,  3.7281e+00,  7.4561e-02,  2.0244e-01,  3.6366e-01,
         1.3138e-03,  3.3142e-01, -5.5360e-03, -2.5600e-02,  6.9374e-02,
         2.5515e-02, -1.1879e-02,  4.3051e-02,  8.0672e-02, -1.2935e-01,
        -1.4515e-02,  7.2193e-02,  2.2088e-02, -4.4698e-02,  1.1729e-02,
        -8.4732e-02, -2.9935e-02, -9.4315e-02,  7.6966e-02, -1.8037e-02,
        -5.5455e-01, -5.7293e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3099, -1.6538, -0.0572, -0.0607, -0.0050, -0.1243, -0.2971,  0.0400,
        -0.7167,  0.3475,  0.6320, -0.2784,  0.5282,  0.3845,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4760,  7.3194,  0.7513,  0.2639,  0.1344,  0.0733, -0.2863,  0.2506,
        -0.7502,  0.2748,  0.0693, -0.6814,  0.2336, -0.0346,  0.2102, -0.4364,
         0.2249,  0.0530,  0.5014,  0.2091, -0.0933,  0.2660, -0.2264, -0.1995,
         0.4593, -1.0712,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2064, -3.7037,  0.5758,  0.1055,  0.2647, -0.3727, -0.0097,  0.0402,
         0.0453, -0.5633,  0.1809,  0.5251,  0.0105,  0.1858,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4723e+00,  6.6668e+00,  9.3604e-01,  9.8841e-02,  3.3509e-01,
        -1.4714e-01,  2.8581e-01, -4.2015e-02, -9.9257e-02, -8.2074e-03,
         3.3360e-02, -3.7719e-01,  2.6394e-02, -7.9136e-01,  2.0681e-02,
        -5.3410e-03, -5.2470e-01,  1.5971e-02, -9.6876e-02, -4.6083e-02,
        -2.2133e-01, -4.0849e-01, -3.0480e-01, -3.7808e-02, -7.5530e-02,
        -1.2515e-01, -5.1359e-02, -1.0386e-01,  3.9426e-02, -6.5304e-02,
        -4.2460e-01,  1.3102e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6046e-01,  1.0328e+01,  1.5897e+00,  3.2053e-02, -4.9401e-01,
         1.8570e-02, -5.8252e-01, -1.0215e-01,  5.1834e-01, -2.3273e-01,
         1.7412e-02, -8.1012e-02,  9.2025e-02, -4.4601e-02,  3.1927e-01,
         2.9251e-03, -4.1675e-01, -1.6253e-01, -1.7002e-01, -1.2514e-01,
         6.2174e-02, -8.4553e-02,  1.4816e-01,  2.5782e+00, -1.2915e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0785, -2.6253, -0.7504,  0.2840,  0.0721,  0.5557,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.4235e-01,  5.6398e+00, -1.4359e-02, -3.5786e-01, -5.1077e-01,
         9.9715e-02, -2.3401e-02,  3.4248e-01,  2.7002e-02, -2.7125e-01,
         1.1623e-01,  2.5442e-01,  8.7195e-02,  1.1508e-01, -1.4935e-01,
         2.0230e-01,  4.1012e-02,  1.6001e-01, -4.0059e-02, -8.4660e-02,
        -7.5568e-02,  2.3364e-02, -6.2781e-02, -1.5537e-01, -1.6434e-01,
        -1.2506e-01,  1.9673e-02, -1.6433e-01,  1.0705e-01,  4.9043e-02,
         1.8009e-01,  1.4998e-01,  1.6184e-01, -1.3056e-02, -7.1850e-02,
        -2.5799e-03,  3.7940e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7447e-03, -7.9627e+00,  6.2756e-01, -1.1179e+00,  2.6192e-01,
        -1.4926e-01, -7.0224e-01,  4.9542e-01, -2.1666e-01, -1.3085e-01,
         6.3670e-02,  4.0853e-02,  1.2228e-01,  3.1370e-01,  1.6788e-01,
         7.7162e-02,  2.8436e-01,  8.6821e-02,  2.0220e-01,  9.7387e-02,
         4.2399e-01,  9.3818e-02,  1.5550e-01, -9.5988e-02,  1.3897e-01,
         2.2097e-01, -2.0779e-01,  7.7343e-02,  2.9040e-02, -1.0716e-01,
         1.0394e-01,  8.0070e-03, -2.3587e-05,  5.8832e-01,  8.6055e-02,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3230, -3.3091,  1.7394,  0.0408, -0.2193, -0.1404, -0.2248, -0.0798,
         0.1885, -0.3714, -0.1024, -0.1829,  0.3949, -0.7135,  0.4228,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0616e-01, -2.7605e+00, -1.8909e-01,  6.4346e-01, -8.7098e-02,
         7.4393e-02, -5.7654e-02, -7.2560e-03, -2.0276e-01,  3.8834e-02,
        -8.2888e-02,  7.4147e-03,  6.8757e-02,  2.9352e-02,  1.5473e-02,
        -7.9535e-02,  7.0679e-02, -1.4292e-01, -1.2123e-02, -9.4887e-02,
         1.3945e-02, -5.6114e-03, -1.2768e-01, -2.0266e-02, -6.9206e-02,
        -1.4849e-01, -2.6596e-02,  1.1976e-02, -5.8106e-02,  3.1687e-02,
        -1.2390e-01,  3.2884e-01, -1.3024e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3000e-01,  9.4591e+00,  6.8723e-01,  1.1344e-02, -1.4461e-01,
        -8.7020e-01, -1.1996e-02,  1.0239e-02,  4.5684e-03, -1.2543e-01,
         5.0139e-02, -1.1203e-02, -1.7908e-01, -4.0712e-01, -4.3343e-01,
        -2.8968e-02,  4.5722e-02, -1.3288e-02, -1.6443e-01, -1.3553e-01,
        -8.5362e-02,  1.3417e-02, -3.8521e-02, -8.4780e-02, -6.6705e-02,
         7.7769e-02, -3.7324e-01, -4.7839e-01, -3.0273e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.7197, -2.9603,  0.3949,  0.4469,  0.4857,  0.1462, -0.0655, -0.1750,
        -0.2564,  0.3157, -0.0647, -0.1818,  0.2055, -0.0339,  0.4050,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7251e-01,  3.5218e+00,  2.8168e-01, -2.1200e-01, -9.5148e-02,
        -1.4692e-01,  1.4901e-01, -6.0675e-02,  3.9776e-01, -2.3924e-01,
        -1.1514e-01, -5.6789e-02, -1.2596e-01, -1.3654e-01, -1.3892e-02,
        -3.3915e-02, -1.4760e-01, -1.4044e-02, -6.8424e-03,  1.6410e-02,
        -9.5755e-03, -3.7775e-02,  1.6206e-02, -9.4816e-03, -2.4689e-02,
         3.4138e-02, -6.0391e-02, -9.5759e-02,  2.2680e-02,  2.0290e-02,
         8.2343e-03,  4.7834e-02, -2.2098e-03, -1.9296e-02,  1.9843e-02,
         5.2466e-02,  2.4464e-02, -1.3203e-02,  5.2893e-02, -2.2491e-02,
         2.3451e-02,  4.0928e-02,  1.5980e-01,  2.4647e-02, -4.8278e-02,
         7.2206e-02,  1.7547e-03, -1.3847e-01,  3.7996e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7112, -6.8594,  0.7571, -0.1094, -0.0164, -0.3470, -0.2119,  0.1649,
         0.1967, -0.3263,  0.1940, -0.0358,  0.0320,  0.0599,  0.0937,  0.0820,
        -0.2099, -0.0224, -0.0610, -0.2105, -0.2271, -0.0998,  0.0678,  0.1011,
         0.0642,  0.0608,  0.2797, -0.3358,  0.0136,  0.1527,  0.0922, -0.0085,
        -0.0619, -0.3634,  0.0589,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3679e-02, -4.4775e+00,  3.6706e-01,  1.0104e+00,  2.2705e-01,
        -2.6409e-01,  8.2782e-02, -2.9639e-02,  7.7381e-04, -8.3504e-02,
        -6.4690e-02,  8.0298e-03,  3.3238e-02,  1.3388e-02,  1.5739e-02,
        -2.8505e-02, -4.4409e-02, -1.0425e+00,  7.8928e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1640,  3.0252,  0.6236,  0.0864,  0.2373,  0.0926, -0.0511,  0.0259,
        -0.2020, -0.2275, -0.2183,  0.1853, -0.1231, -0.9624, -0.0271,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4892,  3.6432,  0.1707,  0.2878,  0.0538, -0.0700, -0.2187, -0.1222,
         0.0413, -0.1328, -0.2349,  0.0143, -0.0902, -0.1033, -0.0305, -0.2102,
         0.0038, -0.1088,  0.1811,  0.0214, -0.2240, -0.1556,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8082,  4.8236,  0.6482,  0.3138, -0.1142,  0.0448,  0.2813,  0.2681,
        -0.1910, -0.3055, -0.1586,  0.4684,  0.2239,  0.1103, -0.2118, -0.0177,
         0.0682,  0.1179,  0.1016, -0.0208, -0.0413,  0.0693,  0.1034,  0.0660,
        -0.1397,  0.1880, -0.1473,  0.0553, -0.7596, -0.4194,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2682e-01,  3.1564e+00, -7.2788e-01, -1.0809e-01, -4.7787e-01,
        -3.0995e-03, -4.3260e-01, -2.4978e-01,  1.9751e-01, -4.8045e-01,
         2.2802e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8353, 10.1174, -0.0515, -0.0564, -0.2640, -0.2996,  0.1973, -0.0110,
         0.1927,  0.1470, -0.1354,  0.2513,  0.0455,  0.0281,  0.1132,  0.0764,
         0.1066,  0.0564,  0.0429, -0.1185,  0.0794,  0.6630,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8351e-01, -7.3660e+00, -4.1360e-01, -2.3163e-01, -6.0498e-01,
        -2.4255e-01, -1.0279e-01,  3.0512e-01, -1.1092e-01,  9.1364e-03,
         6.2148e-02,  3.5498e-01,  4.5031e-02,  6.0402e-02,  2.2641e-01,
         3.0071e-01, -3.1704e-02,  1.6357e-02,  1.4462e-01,  2.9060e-03,
         9.0009e-02, -6.1240e-02, -1.1175e-01,  1.1663e-01,  9.3980e-02,
         2.4084e-01,  3.5196e-02,  1.7058e-01,  1.4157e-01,  1.4467e-01,
         4.9991e-01, -1.8615e-01, -2.7459e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.2922, -11.9298,  -2.8448,   1.5942,   1.2550,  -0.5884,  -0.6682,
          0.3288,  -0.3862,   0.1907,  -0.2536,  -0.2355,   0.6261,   0.4382,
          0.8761,  -1.5896,   0.1438,   0.0558,   0.1015,  -0.1574,   0.1851,
          0.2790,  -0.0153,   0.1311,  -0.0581,  -0.2732,   1.0649,   1.2497,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0315,  3.6073,  0.5876,  0.0415,  0.3471, -0.5913, -0.1253,  0.3041,
         0.6147, -0.1252, -0.0932, -0.0340, -0.1720,  0.1970,  0.1717,  0.1565,
        -0.0314, -0.4109,  0.1355, -0.1495, -0.2190,  0.0563,  0.3186,  0.3313,
         0.0885,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 1.8123e-01, -5.2784e+00,  6.2122e-01,  1.8520e-01,  4.7791e-01,
        -1.8770e-02,  1.2246e-01,  1.8481e-01,  1.4486e-01,  2.3524e-02,
        -1.5646e-01,  2.4040e-01,  1.6076e-02,  3.4841e-02, -1.6025e-02,
         2.5006e-01, -1.2865e-01, -9.4636e-02, -8.7244e-02, -1.8647e-01,
         1.4949e-01,  4.8366e-02, -2.6075e-01, -4.0707e-02,  1.0631e-01,
        -4.6633e-02,  7.3647e-02, -1.5778e-01,  8.3422e-02,  7.6647e-02,
         8.9514e-02,  2.0137e-02,  4.5645e-03, -1.3143e-02, -6.2533e-02,
         9.7991e-02,  6.7762e-02,  7.6102e-02, -7.3945e-02, -1.0328e-01,
        -7.0742e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3398,  6.4834, -0.4165, -0.7318,  0.7437,  0.3405,  0.1457, -0.4253,
        -0.5541, -0.0180,  0.1867, -0.0592, -0.3981,  0.0813,  0.3341, -0.0478,
        -0.0345, -0.0358, -0.1595, -0.5036, -0.1940, -0.0851, -0.0970, -0.5066,
        -0.2448,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2193e-01,  4.9370e+00,  4.4294e-01, -1.1206e-03, -2.3724e-01,
         9.3536e-02,  3.8947e-01,  2.6174e-01,  1.7672e-01, -6.1836e-01,
        -3.5482e-01, -2.0110e-02, -1.6640e-02, -6.5002e-04, -9.5357e-02,
         7.2887e-02,  3.5464e-02, -2.3455e-01, -4.3494e-01,  4.4180e-02,
        -4.8178e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1821e-02,  2.5034e+00,  1.0074e-03,  9.4109e-02, -2.8483e-01,
        -5.7869e-03,  1.7962e-01, -1.0234e-01,  6.9127e-02,  3.8961e-03,
        -1.7663e-01,  3.9490e-02, -3.0105e-02, -6.7603e-03, -7.6940e-02,
        -7.6011e-02, -7.8224e-01, -3.3543e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5914, -5.9639, -0.0997, -0.4690,  0.1891,  0.4484,  0.0719,  0.2924,
        -0.0204,  0.1240, -0.2435,  0.1249, -0.0643, -0.2311, -0.1526, -0.1355,
         0.1209,  0.4366, -0.1834, -0.5420, -0.1533,  0.1493,  0.0505,  0.0554,
         0.0717, -0.0904, -0.0864,  0.1052, -0.0813,  0.1057, -0.0815, -0.0897,
        -0.0541, -0.1579,  0.2041, -0.2541, -0.3486, -0.3510,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5997,  2.9780,  0.5863, -0.3942,  0.1119, -0.0097, -0.0670, -0.0609,
         0.0793, -0.2375,  0.1326,  0.0068,  0.0935,  0.1158, -0.0812,  0.0825,
         0.1630, -0.2546,  0.1437, -0.0442,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0284, -3.0359,  0.4179, -0.0944, -0.0186, -0.0726, -0.0490, -0.1679,
        -0.0841,  0.0529,  0.2063,  0.1634, -0.0227, -0.3481,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5292, -1.3705, -1.2730, -0.3478, -0.9835,  0.4905, -0.7565,  0.6254,
         0.9696, -0.4991,  0.1129, -0.0302, -0.2004, -0.1267,  0.3653,  0.1786,
        -0.1238, -2.1662,  0.7918, -0.6084,  0.4880, -0.0305,  0.2400, -0.5154,
         0.1336, -0.2183, -0.0990, -0.2686,  0.1620,  0.1724,  0.9075,  0.6386,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3575, -6.5602,  0.5608, -0.3646, -0.0073,  0.2332, -0.6493, -0.1790,
        -0.0180,  0.0436,  0.2599,  0.0152, -0.0603,  0.0501, -0.4109,  0.1731,
        -0.0900, -0.1244, -0.5511,  0.1239,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3948, -5.9949, -0.7988,  0.1674,  0.6546, -0.0655,  0.0136,  0.0772,
         0.2273,  0.0992,  0.2525,  0.0704, -0.0798,  0.0183, -0.0101,  0.1829,
         0.0400, -0.0383, -0.0601, -0.0400,  0.0667, -0.3054, -0.1289,  0.0627,
         0.0692, -0.1885, -0.0290,  0.2898, -0.2776,  0.0278, -0.1432,  0.1972,
        -0.1068, -0.0320, -0.0295, -0.0688, -0.0261, -0.0377, -0.0401, -0.0225,
        -0.1249,  0.2042, -0.1194, -0.1408, -0.1970,  0.4778,  0.1857,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0894,  6.3552, -0.5001,  0.0439, -0.0341,  0.2541,  0.0214, -0.1751,
         0.3462,  0.1046, -0.1733, -0.0298, -0.0118, -0.3552, -0.5002,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7596e+00,  6.7437e+00,  1.8976e+00,  1.1623e-02,  4.3357e-02,
        -1.0101e-01,  3.2581e-01, -1.1244e-01,  6.3213e-01, -4.4447e-02,
         2.2638e-01, -1.7703e-02, -1.0495e-01,  2.7847e-01,  6.2527e-02,
         1.6344e-01, -1.1871e-01, -1.0634e-01, -3.7644e-01,  2.0126e-01,
         2.5775e-01,  2.1582e-01, -2.3672e-01, -2.9814e-01,  1.2394e-01,
        -4.4706e-02,  4.5758e-01,  1.9268e-01, -4.4106e-02,  4.9647e-02,
        -1.8691e-01,  7.2504e-02,  3.2238e-01,  3.2655e-02,  5.5856e-02,
        -1.1564e-01,  3.1091e-02,  4.6117e-02, -9.9390e-02,  1.6378e-02,
         2.1651e-02,  1.9393e-02, -1.1934e-01, -1.2478e-01,  5.1671e-02,
         4.6151e-03, -2.4390e-01,  9.7998e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.9151, 13.8198,  0.5575,  0.4093,  0.1472,  0.4506, -0.1598,  0.4485,
         0.6341, -0.0871,  0.3061,  1.0526,  0.3287,  0.1290, -0.0783,  0.1028,
         0.9112, -0.2536, -0.2001,  0.3402,  0.3441, -0.0575,  0.2964,  0.1274,
         0.4925,  1.5321,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1135,  3.4048,  0.0822, -0.3314, -0.0868, -0.0642,  0.1342, -0.0935,
        -0.0709, -0.0131,  0.0343, -0.1693, -0.1400, -0.0570, -0.0237, -0.0476,
        -0.1676,  0.0472, -0.5063, -0.1601, -0.1521,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0881e+00,  5.8370e+00,  3.2540e-01, -7.1666e-01, -9.1356e-02,
         4.2982e-01,  4.1351e-01, -1.3414e-01,  4.2634e-03,  6.1941e-02,
        -1.6566e-01, -7.7449e-02, -1.0635e-01,  6.7866e-02,  3.5784e-02,
        -3.6649e-02, -1.6338e-01, -1.6336e-01,  1.2310e-01, -1.5639e-02,
         2.5664e-02, -1.3843e-01, -7.2051e-02, -1.0127e-01, -2.0323e-01,
         9.1459e-02, -1.1577e-01, -1.8397e-01, -1.6846e-01, -1.0592e-02,
        -7.5176e-02, -1.5078e-01, -5.2368e-02, -2.5654e-01, -7.3902e-01,
        -1.1823e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4247,  2.9822, -0.2141,  0.0475,  0.4740,  0.0963, -0.0560, -0.2298,
         0.2317,  0.1989, -0.2742,  0.1252,  0.3858,  0.1186, -0.0031,  0.0872,
        -0.0160, -0.1403,  0.9071, -0.8772,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4944e-01, -4.9220e+00, -4.8249e-01, -3.0540e-01, -4.2342e-01,
         8.7967e-02, -1.5616e-01,  3.1071e-01,  6.6958e-01,  8.0377e-04,
        -3.2318e-01, -6.1835e-02,  9.9362e-03, -1.4671e-01,  3.7882e-02,
        -1.0985e-02,  1.3518e-01, -1.0576e-01,  1.8205e-01,  5.4201e-01,
        -1.3709e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4047,  9.2354,  2.3096, -0.2759,  0.9415, -0.2500,  0.2139, -0.8735,
         0.3478,  0.0358,  0.0582,  0.6610, -0.2347,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5549e-01,  7.2485e+00,  1.2471e+00,  2.8514e-01, -7.7948e-03,
        -3.6915e-01, -7.6906e-01, -4.4537e-02,  7.4473e-02,  1.6137e-01,
         2.7052e-01, -1.0323e-01, -2.8279e-02, -3.0410e-02,  2.0089e-01,
         4.1436e-01, -2.2591e-01, -3.1895e-01, -1.4716e-01,  1.4841e-01,
         4.4559e-03,  1.8816e-01,  1.5637e-01,  2.1821e-01,  1.0868e-01,
        -5.8989e-02, -1.0965e-01, -9.2759e-03,  9.8817e-02, -2.9842e-01,
         5.2255e-02,  6.4228e-02, -6.8087e-02, -1.4778e-02,  6.5459e-02,
        -5.8012e-01,  1.5222e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5821,  7.0254,  0.4461, -0.8609, -1.6377, -0.2505, -0.0926, -0.3057,
         0.0644, -0.2036, -0.0347, -0.1606,  0.5923, -0.1435, -0.1643, -0.1939,
         0.0099,  0.2636,  0.1344, -0.2180, -0.3672,  0.0980, -0.2498,  0.5978,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.4632, -15.7510,  -0.7673,  -1.0169,   0.9988,   0.0836,   1.5301,
         -1.1810,   0.0917,   0.3478,  -0.5899,  -0.2330,  -1.1701,  -0.0445,
          0.5420,   0.2365,  -0.1080,   0.1050,   0.0621,  -0.7659,   0.4200,
          1.1167,   0.3147,  -0.0920,   0.3071,   0.1506,   0.2674,  -0.3166,
         -0.1190,  -0.2221,   0.8048,  -1.0873,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6200, -7.2485, -1.1617,  0.0392,  0.1239,  0.8313, -0.2472,  0.0855,
        -0.1638, -0.2412,  0.1551,  0.0564,  0.1311,  0.0114,  0.1265,  0.1640,
         0.2288,  0.0862,  0.0492,  0.1326, -0.0950,  0.0857,  0.1377, -0.1650,
        -0.1146,  0.2166,  0.0904,  0.0186,  0.0537,  0.2268,  0.0098,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7628,  5.4494,  1.6076,  0.0545, -0.1284, -0.7208, -0.4576,  0.0356,
         0.0164, -0.2489,  0.3626,  0.0867,  0.6273, -0.2286, -0.0796, -0.1488,
        -0.0338,  0.0173, -0.3285, -0.2992, -0.7818, -0.3365, -0.4909,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.8546e-01, -5.0385e+00, -2.6993e-01, -8.0559e-01, -2.2733e-01,
        -6.9807e-02, -3.0230e-03, -3.8057e-01, -8.3557e-02, -3.1921e-02,
         2.1827e-01, -8.4445e-02,  1.1544e-02,  3.2280e-01,  6.3384e-02,
        -2.2168e-01, -5.5521e-02,  6.1926e-02, -1.4918e-01,  1.2697e-01,
         1.7544e-01,  4.9111e-02,  9.5566e-02,  8.7922e-02,  7.2759e-02,
        -6.9377e-02,  2.0498e-01, -1.3613e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 2.7615e-04,  1.0408e+01,  1.2526e+00,  4.4077e-02,  2.2531e-01,
         8.1823e-02, -4.4282e-02, -2.6203e-01, -2.6857e-01,  1.2887e-01,
        -4.2518e-01, -1.0346e-01, -1.3593e-01, -1.4801e-01, -2.3649e-01,
         1.0932e-01, -2.9375e-02, -3.0013e-01,  1.2885e-02, -3.7647e-01,
        -1.4013e-01,  1.5882e-01,  1.9657e-01, -5.4500e-01, -1.3177e-01,
         9.1733e-02, -3.0113e-01,  4.0175e-01,  4.8076e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.1032e-01,  4.7610e+00,  6.8468e-02, -4.9841e-02, -3.9210e-01,
         1.2087e-01, -2.8544e-03, -2.4452e-01, -5.4341e-01, -4.4846e-02,
        -1.5135e-01, -5.7665e-01, -4.5472e-02, -8.7979e-02,  2.6912e-01,
         3.1659e-02,  1.2036e-01,  2.1114e-01, -1.2733e-02,  1.7105e-01,
        -5.6457e-02,  2.4579e-01,  3.2578e-02,  1.7027e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8222e-01, -7.6238e+00, -7.3622e-02,  1.5227e-01, -4.6699e-01,
         7.1933e-01,  1.4087e-02, -8.7543e-03,  3.3080e-02,  2.2427e-01,
         4.7389e-01, -5.2907e-03,  1.1329e-01, -3.8996e-02,  1.0454e-02,
         9.8538e-02,  1.4850e-01,  1.6108e-01, -3.0975e-01,  6.3659e-02,
        -3.1914e-01, -1.3362e-01, -4.9499e-02, -7.2656e-02,  1.1239e-02,
        -7.2673e-02, -6.2922e-02, -2.8361e-02, -1.2077e-01, -2.7646e-01,
         1.1582e-01, -7.3846e-01,  4.8733e-01,  1.5146e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4103,  3.6650,  0.0101, -0.1593, -0.1210, -0.1416,  0.1784,  0.1389,
         0.1311,  0.2544, -0.0730,  0.1087,  0.0273, -0.0814,  0.0221,  0.0190,
         0.0399,  0.0404, -0.1275, -0.0433,  0.0489, -0.3476,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9904, -3.1009, -0.1007,  0.2304,  0.1916, -0.6348, -0.1023,  0.0493,
        -0.0843,  0.0524, -0.2780, -0.1146, -0.1146, -0.0483,  0.1074,  0.4161,
        -0.5007,  0.6595,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3215, -1.7098, -0.1767, -0.1316, -0.0933, -0.2453, -0.0801,  0.0677,
        -0.1375, -0.0218, -0.1710, -0.0426, -0.0044, -0.1041, -0.0119, -0.1471,
        -0.0452,  0.0144,  0.0230,  0.0528, -0.1988,  0.1692, -0.2234,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0288,  7.0335, -0.6859,  1.1064, -0.5419, -0.0213,  0.1834,  0.0138,
        -0.0247,  0.0485,  0.4170,  0.2488,  0.1365,  0.2650, -0.6662, -1.1031,
        -0.0369,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3449,  9.0954,  2.7071,  1.7020,  0.1524,  0.3838,  0.2635, -0.3277,
         0.1763, -0.1273,  0.0547,  0.1496, -0.0432,  0.1953,  0.2854, -0.0232,
        -1.6572,  0.0819,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1519e-01, -7.0828e+00, -8.2522e-01,  2.2476e-01, -5.1539e-03,
         9.8655e-02,  2.3013e-01, -2.9321e-02,  4.0090e-02, -2.2812e-01,
         2.5588e-01,  1.0861e-01,  2.4059e-03,  2.8685e-01,  3.5345e-01,
         1.0162e-02,  2.2319e-01,  1.1092e-01, -2.4393e-01,  2.9992e-01,
         9.4699e-02, -1.1030e-01,  6.0875e-02,  5.8775e-02,  7.1244e-01,
        -4.6626e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3359e-01, -4.3073e+00, -1.1682e-01,  2.1314e-01, -3.0278e-02,
         3.2835e-01, -4.9234e-01,  7.7556e-02,  4.3920e-01,  2.5306e-01,
        -8.3341e-02,  3.9298e-02,  1.4596e-01,  2.7252e-01,  1.3900e-01,
         4.5409e-02, -3.7907e-01, -1.7181e-02, -3.3860e-01, -2.1871e-01,
        -1.7689e-01, -2.1055e-01, -6.6001e-02,  1.5889e-01,  2.8765e-03,
        -2.0664e-01, -9.3624e-02,  1.8279e-01,  2.5863e-01, -3.8581e-01,
         1.3409e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4531,  5.2827,  0.7587, -0.2085,  0.4206,  0.0423, -0.0724,  0.3436,
         0.0940, -0.4048, -0.5854, -0.1467, -0.1123, -0.1600, -0.2542,  0.0743,
        -0.5604, -0.3117,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9217,  6.9510,  0.3213, -0.3807, -0.4037, -0.4596,  0.9306,  0.8102,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 2.1836, 15.1768,  1.1066, -0.5105,  0.2861,  0.0531,  0.4716,  0.2031,
         0.0532, -0.1900, -0.2149, -0.3330, -0.0310, -0.2099, -0.1829, -0.1897,
         0.0473, -0.2761, -0.0422, -0.1053, -0.3349, -0.5063, -0.4150, -0.2727,
        -0.0837,  0.0395, -0.3457,  0.0885, -0.1241,  0.1484,  0.5006,  0.0193,
        -0.0636,  0.0717,  0.4732,  0.1623, -0.4164, -0.4258,  0.3601,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4057e-02,  2.8823e+00,  6.9476e-02,  7.3936e-02,  4.0279e-02,
         9.7948e-02,  3.6431e-02, -1.9358e-03,  1.2493e-01,  2.0306e-01,
         1.4994e-01, -4.1187e-02, -2.6401e-01, -4.3992e-02, -1.3192e-01,
         1.2532e-01,  1.2281e-02,  1.5466e-03, -2.3335e-02,  4.4366e-02,
         1.0771e-01, -1.7866e-01,  7.7725e-02,  2.2173e-02, -5.1057e-02,
         4.4925e-02, -1.7097e-01,  7.8194e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8745,  5.3870,  0.9094, -0.1879,  0.1213, -0.2102,  0.4891,  0.2962,
         0.0306,  0.2726, -0.2012, -0.8893, -0.3357,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7184,  5.5169,  0.3337, -0.5073, -0.1353, -0.0117, -0.1997, -0.0889,
         0.0929, -0.1198, -0.0661, -0.1080, -0.0802, -0.4368, -0.1963,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0580,  5.0213,  0.6638, -0.1256,  0.5876, -0.1951, -0.4483, -0.0812,
        -0.1191,  0.0795, -0.0751, -0.0534,  0.0774,  0.3465,  0.2486, -0.0160,
        -0.1337, -0.0598,  0.0075, -0.0713,  0.0614,  0.0380,  0.1220,  0.0715,
        -0.2024, -0.0970,  0.0277,  0.0300,  0.2883, -0.0189, -0.0408,  0.0905,
        -0.3589,  0.1350,  0.1622,  0.0070,  0.2414,  0.1551,  0.0575,  0.0748,
         0.0063,  0.0780, -0.1212,  0.0388, -0.1552,  0.2387, -1.3242],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2775e-01,  2.8441e+00, -4.8068e-04, -9.4231e-02, -4.2189e-01,
        -1.2608e-01, -4.2776e-02,  1.2358e-01,  4.5754e-02, -4.5942e-02,
        -2.4708e-02,  7.4084e-03, -3.5171e-03, -6.1047e-02,  3.3709e-02,
        -2.6139e-02,  4.8296e-02, -5.7390e-02, -2.4404e-02, -4.0564e-03,
        -1.8438e-02,  1.4415e-03, -7.7045e-02, -2.1055e-02, -1.5352e-02,
        -2.7666e-02,  5.6528e-02,  5.3547e-02, -1.6487e-02, -1.6280e-01,
        -2.1011e-03,  1.9136e-02, -8.1733e-02, -6.9202e-03, -3.1474e-02,
        -4.5872e-02,  2.5237e-02, -1.1339e-02, -1.8604e-02, -1.2213e-01,
        -9.8907e-02,  7.3807e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3532e-01,  1.4892e+01,  1.3781e+00,  1.7526e-01,  6.0726e-02,
         9.5683e-01,  5.6481e-01,  7.5528e-01, -6.5736e-02,  5.0562e-02,
         4.5869e-01, -6.0684e-01,  5.2015e-01,  4.7039e-01,  1.4478e-01,
         5.3517e-01,  1.4906e-01,  1.4085e-01,  8.6494e-02, -5.0611e-03,
        -2.1965e-01, -5.3498e-01, -1.9409e-01, -3.5988e-01,  2.3234e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-10.1175,  11.2266,  -0.2531,   1.0628,   0.1394,  -0.0565,   1.6292,
         -0.2410,  -0.4966,   0.5715,   0.1490,   0.0289,  -0.1785,   0.6453,
         -1.3262,   0.5522,   0.4368,  -1.1445,  -8.3803,   2.7408,   7.8015,
         -4.4079,  -1.8211,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6237, 10.7673,  0.2649,  0.1751, -1.4161,  0.4255,  0.1118, -0.3023,
        -0.1152,  1.7040,  0.8039, -0.3757, -0.1329,  0.3688, -0.1512, -0.0221,
        -0.3261,  0.3724, -0.1263, -0.3996,  0.0378, -0.8827, -0.2296,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5286, -7.6517, -0.6384, -0.5389, -0.0421,  0.1098, -0.6712, -0.9505,
        -0.4227, -0.0533, -0.1731,  0.7338,  0.6487,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4890,  7.0618,  0.2547, -0.3357, -0.2350,  0.0374, -0.3857, -0.2667,
        -0.3013,  0.2388, -0.1245, -0.1154, -0.9457, -0.1377,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.8082, -12.1951,   0.5692,   0.5798,  -0.2643,  -0.3521,   0.0600,
         -0.4181,  -0.2722,  -0.0786,   0.2281,  -0.1696,  -0.0241,  -0.0696,
         -0.0472,  -0.3381,   0.7830,   0.5607,   0.2948,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 3.1612e+00,  8.0550e+00, -2.7749e-01,  2.6936e-01,  1.9119e-01,
        -2.1720e-02,  1.2944e-01,  9.6148e-02, -1.0224e-01,  1.7900e-01,
        -2.0355e-01, -1.5893e-01, -8.6420e-02, -8.4867e-02,  2.1815e-01,
         3.5929e-02,  3.4741e-02, -1.7963e-01, -1.3646e-01, -6.6391e-02,
        -4.2621e-02, -7.6638e-02,  6.4283e-03, -8.6713e-02,  6.9598e-01,
        -3.8140e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3928e+00, -6.0215e+00, -9.8919e-01,  5.7163e-01, -6.1815e-01,
        -2.3681e-01, -6.4904e-02,  5.0630e-02,  1.0034e-01,  2.2992e-01,
         1.6363e-01,  1.3826e-02,  2.7450e-03, -4.2182e-01, -1.8462e-01,
        -2.9843e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.8228, -10.2490,   0.7150,  -1.7427,  -0.4390,  -0.4642,  -0.7474,
         -0.6034,  -0.4643,  -0.6207,  -0.3426,  -0.0486,  -0.1812,  -0.8890,
         -0.8482,  -0.2297,  -0.6133,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2651e+00,  8.3634e+00,  6.1677e-01, -1.9519e-02, -4.5938e-01,
         3.3291e-02, -1.2274e-03,  6.1483e-02, -3.3846e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.9192e-01,  2.9046e+00,  5.0617e-01,  1.5256e-01, -3.5213e-01,
        -9.7711e-02, -5.7120e-02,  5.7210e-03,  2.1244e-02, -1.3586e-02,
         8.2350e-02,  1.7651e-02,  3.4862e-02, -6.4300e-02,  1.7822e-02,
        -6.3670e-02,  1.6400e-01,  2.7651e-02, -2.4440e-02,  1.1895e-01,
         2.2568e-01, -1.2626e-03,  5.5630e-02,  6.7252e-02, -5.9079e-02,
         4.6562e-02,  1.3172e-01, -2.8798e-02,  1.9134e-02, -8.4993e-03,
         1.1583e-02, -8.5673e-02, -1.1566e-02,  3.2477e-02, -1.8676e-02,
         1.2791e-02,  1.0034e-01,  2.8091e-01, -3.6211e-02,  1.1907e-02,
         1.8827e-02,  1.8989e-02, -2.2573e-01, -4.1389e-02, -2.1999e-02,
         1.1769e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6246e-02,  4.5541e+00, -3.9551e-01, -9.5828e-02, -2.1965e-02,
        -7.0677e-02,  3.2260e-01, -2.8760e-02,  2.4312e-01, -5.9096e-01,
        -3.0096e-02, -2.8306e-02,  6.3706e-02,  7.8306e-02, -1.1481e-02,
         4.3641e-02,  1.8536e-02,  2.2756e-04, -3.1370e-02,  1.8029e-02,
         1.3662e-01,  1.4932e-01,  1.7970e-01, -2.2563e-02, -1.5317e-01,
         1.7578e-01,  2.3784e-01, -2.4031e-02,  3.7715e-02, -1.8681e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.5746, -13.9631,  -0.6473,  -0.4191,  -0.7034,   0.4311,  -0.7450,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7700, 11.6258,  0.2287,  1.0594,  0.1177, -0.1918,  0.1815, -0.3251,
        -0.2273, -0.7842,  1.2131,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4904,  5.6558,  0.1162,  0.0865, -0.0362, -0.1735,  0.0833, -0.3958,
        -0.1116, -0.1870, -0.2053, -0.2542, -0.4464, -0.7143,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9877, -9.5030, -1.3768, -0.2327,  0.1263,  0.2724,  0.2216, -0.0148,
         0.1998,  0.0888, -0.0888, -0.0269,  0.2833,  0.3881,  0.0875,  0.2631,
         0.0544,  0.0538,  0.0577,  0.2910, -0.1127, -0.0656,  0.2402,  0.1472,
         0.2287, -0.0507, -0.0888,  0.0964,  0.1489,  0.3934,  0.0298,  0.1656,
        -0.4212, -0.6719,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6278,  5.2149,  0.3485, -0.3861, -0.2401,  0.2976,  0.3220, -0.1527,
         0.3091, -0.5347,  0.1196, -0.0389,  0.1504,  0.2719,  0.2744, -0.3433,
         0.0573,  0.1230,  0.0761,  0.1014,  0.1279, -0.1209,  0.0175,  0.1376,
        -0.0296, -0.1888, -1.5988,  0.8669,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8462, 10.2864,  2.7304, -1.7449, -0.5817,  0.5510,  0.2979, -0.1016,
         0.5825, -0.1197, -0.1409, -0.4479, -0.1872, -0.2099, -0.1803,  0.0576,
        -1.6662, -0.5403,  0.4415,  0.0984, -0.0534, -0.1740,  0.7569,  0.2725,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([-4.4770, -6.5192,  0.5966,  1.2042,  0.6870,  0.1382, -0.0272, -0.0106,
         0.0905, -0.0308,  0.0480,  0.6688, -0.1470, -0.0547,  0.1269, -0.0067,
         0.0220,  0.5497,  0.0377,  0.0715, -0.1680,  0.1401,  0.2086, -0.0145,
        -0.1841, -0.0270,  0.4909,  0.1068,  0.0709, -1.1912, -0.0741,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2350, -9.4138,  0.6898,  0.6657,  0.9390,  0.0874, -0.2176, -0.0682,
         0.2610,  0.0274, -0.0775, -0.7712,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6497, -5.3512,  0.1274,  0.7315, -0.0895, -0.1525, -0.3924, -0.0056,
        -0.0722,  0.5265, -0.3776, -0.3165, -0.4721,  0.1512, -0.1444,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6468e-01, -5.9250e+00, -1.4072e-01,  2.7550e-01, -7.5056e-01,
        -1.7843e-01, -3.5526e-01, -4.7852e-02, -3.7688e-01, -3.2049e-02,
         3.2842e-01,  2.6835e-02,  1.5442e-01,  1.9775e-01,  7.6590e-02,
        -1.3409e-02,  2.8666e-02, -2.2966e-01, -1.4493e-01, -2.3095e-01,
        -4.9671e-02, -7.9226e-02, -9.6370e-02, -3.0494e-02, -4.8905e-03,
        -1.4438e-01,  1.1727e-01, -1.2069e-01,  8.8678e-02, -4.8087e-03,
        -2.0232e-01,  2.1607e-01, -1.9065e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4702,  7.3135, -1.4075, -1.0795,  0.4839,  0.4626, -0.2443,  0.0776,
         0.2598,  0.1208, -0.1017,  0.4126, -0.1132, -1.0868, -1.5677, -1.3870,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8347,  5.3459,  0.4908,  1.2262,  0.3189,  0.9861, -0.9371, -1.0985,
         0.3917,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8829,  2.2707,  0.1661, -0.2466, -0.2145,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9624,  9.1092,  0.5064,  0.2847, -0.0994, -0.3311,  0.1294, -0.1170,
        -0.1723,  0.2144,  0.3401, -0.1289, -0.1535,  0.0825,  0.0984,  0.2520,
         0.1343,  0.2384,  0.1248, -0.0509, -0.0171,  0.0127,  0.0997, -0.3810,
         0.2062, -0.0436,  0.7186,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3038,  4.9828, -0.3660,  0.2638, -0.0518,  0.3637, -0.6065,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  4.3025, -15.3272,  -0.4671,  -0.4103,  -0.6184,  -0.4833,   0.2377,
         -0.0762,  -0.1398,  -0.1608,  -0.1550,  -0.3160,   0.1010,   0.1660,
          0.4702,   0.1353,  -0.5944,   0.3035,  -0.2251,  -0.9473,   1.4493,
          0.0396,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9381, -7.0828, -0.5824,  0.2658,  0.2757, -0.1364,  0.1239,  0.1372,
        -0.2040,  0.2557, -0.4557, -0.0854, -0.2421, -0.4225,  0.0130, -0.4672,
        -0.3986,  0.2056,  0.7570,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1260,  5.8587,  0.4815,  0.1341,  0.4129, -0.4754, -0.1800,  0.1438,
         0.2730, -0.0683, -0.2199,  0.0430, -0.0898, -0.0755,  0.0400,  0.0768,
        -0.0965, -0.0354, -0.1282, -0.3415,  0.1145,  0.0508,  0.0079,  0.1157,
         0.1137, -0.0280, -0.0434,  0.0489, -0.3065,  0.0774, -0.3882,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-0.4104, -9.4710, -1.4048,  0.0284, -0.9647, -0.2255, -0.6571, -0.3012,
        -0.1747,  0.0516, -0.3538, -0.1662,  0.9462, -0.2558,  0.0102, -0.7509,
         0.3024, -0.9310,  1.0177, -0.2864, -0.1064,  0.1393,  0.0530,  0.9923,
        -1.2232,  0.0180,  0.0265,  0.1451,  0.3295,  0.1553, -0.1025, -0.3376,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3496, -4.2931, -0.8396, -0.1442,  0.2711, -0.0827, -0.3884,  0.1015,
        -0.5470,  0.0159, -0.1121, -0.1122, -0.0728, -0.4486,  0.0156,  0.2397,
         0.0867, -0.1770, -0.1115, -0.1510,  0.0664,  0.2486,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1920,  4.0071,  0.0281,  0.1116,  0.1295,  0.2231, -0.6909,  0.5866,
         0.3731,  0.0464,  0.0908,  0.1338,  0.0191,  0.0635, -0.1265, -0.0215,
        -0.1169,  0.0333,  0.0454,  0.1655,  0.1549, -0.0328, -0.0729, -0.1407,
        -0.0080, -0.0566, -0.0044, -0.3698,  0.2703, -0.0489,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.4769, -11.4123,   0.0121,  -0.4743,  -0.0446,   0.4928,  -0.3831,
         -0.2808,   0.4510,   0.9193,  -0.1294,   0.5838,  -0.1000,  -0.1132,
         -0.0815,  -0.0497,  -1.1937,  -1.1985,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7763e+00,  7.0075e+00,  5.8024e-01,  4.3576e-01,  4.0891e-01,
         2.6872e-01, -1.5237e-01,  1.2188e-01, -4.7699e-02, -6.9589e-02,
         1.2524e-01, -6.8754e-05, -8.6555e-02,  1.5292e-02, -2.5259e-02,
        -1.4244e-01,  2.0179e-01, -2.7930e-03,  3.1210e-01,  2.1764e-01,
        -1.1315e-01, -1.3189e-03, -1.7809e-01,  2.5642e-02, -1.5809e-01,
        -7.0148e-02, -2.9213e-01, -1.8665e-02,  4.6697e-04,  5.1070e-02,
         1.0922e-01, -3.1097e-02, -1.0779e-03, -3.2395e-01, -1.4944e-01,
         1.9194e-01, -1.4032e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.8836e-01, -4.7165e+00, -5.8583e-01, -1.1260e+00, -7.0753e-02,
        -4.8930e-02, -1.3936e-01,  3.0974e-02, -3.5271e-02, -1.2744e-01,
         9.3676e-03, -7.4957e-02, -1.7623e-01, -1.2644e-01,  3.2223e-02,
         9.2300e-02, -2.1039e-01,  2.0908e-03, -1.7499e-01,  6.0416e-02,
        -3.1489e-01, -4.9399e-02, -2.8568e-01, -1.2780e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1396e-01,  5.6001e+00,  1.7538e-01, -2.3615e-01,  3.1512e-01,
        -5.2346e-02,  2.3777e-01,  1.3025e-01,  2.3000e-01,  2.5977e-01,
         1.1987e-01, -2.6782e-02, -2.2187e-01,  6.3526e-02,  1.2404e-01,
         2.5310e-02, -2.5716e-01,  7.8228e-02,  3.8463e-03, -2.3243e-02,
        -4.0317e-02, -1.1125e-01, -1.3582e-01, -9.3609e-02,  1.1932e-01,
         3.2669e-02,  2.6260e-02,  1.5346e-01, -5.6364e-02,  9.6755e-03,
         4.5736e-01, -7.1175e-03, -1.2525e-01,  1.0569e-01,  1.7137e-01,
        -1.7229e-01, -1.0177e-02, -8.3263e-02,  3.1286e-02,  4.0121e-02,
        -1.0466e-01,  5.9051e-02, -1.0521e-02, -1.1359e-01, -4.9375e-01,
        -1.5579e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3475, -5.9825, -0.6344, -0.0471, -0.2450, -0.3361, -0.4957, -0.1605,
         0.0987,  0.0229,  0.1249,  0.0919, -0.4674,  0.0704,  0.0652,  0.4039,
         0.0974,  0.0898,  0.3742, -0.0794, -0.1336,  0.1667, -0.1777,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4994, -9.6737,  0.1639,  0.3777,  0.0557,  0.0525, -0.0384, -0.4571,
         0.3688,  0.9411,  0.4428,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1517,  6.9423,  0.8052, -1.0277, -0.1997,  0.9730,  0.7176, -0.1069,
        -0.5155, -0.1574,  0.2849, -0.0959,  0.5939,  0.4631,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1153, -9.0291, -0.6785,  1.3835, -0.8469, -0.2970,  0.0624,  0.2536,
         0.5865, -0.1455, -0.1565, -0.5157,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6793e-01,  9.0445e+00,  1.6477e+00, -8.4278e-01,  2.3118e-01,
         3.8373e-01, -2.2934e-01,  4.6911e-01,  3.4548e-02, -4.3637e-02,
        -1.0574e-01, -2.6878e-01, -2.7198e-02, -1.7296e-02, -3.6776e-03,
        -1.7553e-01, -2.7572e-01, -6.6121e-02, -6.7446e-02, -3.7390e-01,
        -1.6965e-01,  5.5742e-02, -1.0963e-02,  6.9259e-02,  3.6136e-02,
         1.2085e-01,  4.3441e-01, -6.1742e-02, -6.1371e-02,  6.1320e-02,
        -1.2934e-02, -2.5782e-01,  1.7482e-01,  1.0587e-01,  1.3585e-01,
        -7.8129e-02, -5.0052e-02, -1.3164e-01,  4.9597e-02, -8.7840e-02,
         3.5976e-02,  1.0747e-01,  1.0378e-01,  2.4052e-02,  4.7776e-02,
         2.0434e-02, -2.1838e-02, -1.0531e-02,  2.6224e-02, -1.2300e-01,
         8.2354e-03, -4.9273e-02,  4.4943e-02, -3.9294e-02,  1.9153e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 1.2943e+00, -7.0295e+00, -4.5387e-01, -1.6394e-02,  1.9116e-01,
         4.9802e-01, -7.4497e-02,  1.2084e-01,  2.3232e-01,  3.9825e-02,
         3.6281e-01,  6.5280e-02, -5.7872e-03,  2.8009e-01,  3.8615e-01,
         1.0038e-01,  4.5908e-02,  4.6195e-03, -3.2164e-03,  1.5208e-01,
         2.0697e-01, -1.8427e-02, -8.5827e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7818, -2.8346, -0.2011, -0.1337, -0.2143, -0.0199, -0.0105, -0.1948,
        -0.0757, -0.0671,  0.3227,  0.0049,  0.0846,  0.2183,  0.1361,  0.1100,
         0.1160,  0.2000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6861,  2.5723, -0.1141, -0.2518,  0.0108,  0.1611,  0.5922,  0.0111,
         0.0247, -0.0243,  0.1049,  0.0448,  0.2041, -0.0522,  0.1128,  0.0958,
        -0.0943, -0.1687,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3617, -4.1488,  0.2197,  0.1482,  0.2511,  0.7648,  0.0578,  0.6010,
         0.5970, -0.0204,  0.0647, -0.1483, -0.0350,  0.6766,  0.1228,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.0018, -11.7591,  -0.7443,  -0.4409,   0.0464,  -0.7313,   0.6713,
          0.2413,  -0.4889,  -0.1810,   0.2208,   0.2973,   0.0133,  -0.4127,
          0.5578,   0.9493,  -0.6895,  -0.2637,  -0.1613,  -0.7243,  -0.0173,
          0.5543,  -0.0523,  -0.1389,  -0.1796,   0.1606,  -0.2723,  -0.0849,
          0.1248,  -0.0281,   0.5971,  -0.5062,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3323e-01, -7.4543e+00, -1.4774e-01, -3.2325e-01,  2.4324e-01,
         8.3971e-02, -3.2180e-02, -1.5418e-02, -1.8880e-01, -1.5240e-02,
        -2.6443e-01,  8.9623e-02, -4.2612e-02, -1.7600e-01,  2.0444e-02,
         2.6031e-01, -1.9920e-02, -1.9080e-02, -2.4733e-03,  4.0718e-01,
         2.1107e-01, -2.1334e-01,  1.0776e-01,  2.6105e-01,  8.9204e-02,
         2.6677e-01,  1.3876e-01, -1.4163e-01,  1.0849e-01,  2.4414e-01,
        -8.2786e-02,  8.6145e-02, -1.0255e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.7560e-01,  4.6503e+00,  2.2747e-01,  2.0501e-01,  2.6719e-01,
        -2.9083e-01, -1.0479e-02,  1.3293e-01,  6.3156e-02,  6.7916e-02,
        -1.5011e-01, -1.0309e-01, -1.9355e-01,  2.4838e-01, -1.2765e-01,
        -7.2551e-02,  5.5279e-02,  3.2158e-02,  1.2721e-02, -5.0064e-02,
         1.5018e-03,  9.4101e-04, -1.5642e-02, -1.0438e-01,  1.9309e-02,
        -7.6112e-02,  1.0416e-01, -5.5001e-02,  8.7561e-03,  8.2715e-02,
         7.8070e-02,  2.9155e-02, -6.6160e-02, -1.2929e-02,  9.1493e-03,
        -9.1391e-02,  9.5607e-04, -3.2726e-02, -8.9496e-03, -4.8423e-02,
         1.9551e-02,  9.2697e-04, -5.4530e-02, -8.2487e-02, -5.8170e-02,
         6.8084e-02, -4.9257e-02,  1.0010e-01, -8.0731e-02,  8.3099e-02,
        -1.3738e-01,  1.4935e-02, -1.1581e-01, -1.0257e-01, -2.7013e-02,
        -1.1806e-01, -2.3681e-01, -1.6100e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0927e+00,  7.5913e+00,  9.2153e-01, -5.8869e-03,  2.9864e-02,
        -1.5078e-01, -1.7013e-01,  1.8910e-01, -1.2043e-01,  4.8386e-02,
        -2.0569e-01, -2.1531e-01, -3.3429e-01, -4.5051e-02, -3.5737e-02,
        -1.9946e-02, -4.6159e-02, -8.2581e-02,  7.6963e-02,  1.7664e-01,
         5.8059e-02, -5.6393e-02, -2.9533e-01, -1.9837e-01, -1.8024e-01,
        -9.4894e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8452e+00,  1.1440e+01, -5.3793e-04,  5.5286e-02,  3.2107e-03,
        -2.9033e-01, -4.9015e-01, -1.1699e-01, -6.7765e-01,  1.6687e-01,
         1.1039e-01, -6.6026e-02, -1.6128e-01, -3.9375e-02, -4.2151e-02,
         5.5047e-02,  2.7066e-01, -1.6156e-01, -3.6904e-02, -2.4659e-01,
        -1.9064e-01, -9.0618e-02, -3.3982e-01,  7.3936e-01, -8.4162e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2450,  2.9801,  0.7784,  0.1446, -0.4684, -0.1728,  0.4369, -0.1212,
         0.0489,  0.3576, -0.1740,  0.0865,  0.3232,  0.0156,  0.0170,  0.2148,
         0.0557,  0.0431,  0.0117,  0.0823, -0.0882,  0.4771, -0.0265,  0.0115,
         0.1546, -0.0032, -0.0215,  0.0725,  0.0293,  0.0327, -0.0756, -0.1514,
        -0.2224, -0.0146,  0.0871,  0.0273,  0.0146,  0.0440, -0.0847, -0.0596,
        -0.0151, -0.0062, -0.0902, -0.1094, -0.3169,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5516,  4.5429, -0.1864,  0.1053,  0.0441, -0.2367,  0.2158,  0.1845,
        -0.3968,  0.1162, -0.1503,  0.0789, -0.1022, -0.0729,  0.2033,  0.2185,
         0.3540, -0.0550, -0.1735, -0.0108,  0.1478, -0.2711,  0.2030,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2230,  9.8944, -0.9785, -0.7900,  0.0504,  0.1973,  0.0638,  0.1056,
        -0.2162,  0.1018, -0.1971, -0.1016,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.7477e+00, -1.5659e+01, -1.6885e+00, -3.5037e-01, -2.3234e-02,
        -6.8678e-01, -1.1080e+00, -5.5759e-01, -1.2988e-01,  3.8170e-01,
         1.3805e-01, -3.3218e-01, -1.7977e-01, -2.6801e-01,  9.1637e-03,
        -4.5359e-01,  3.5589e-01,  2.4253e-01,  1.5362e-01,  7.4734e-02,
         8.7220e-01,  3.0025e-01, -4.0456e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0644, -5.4372, -0.0252,  0.0899,  0.1782,  0.3026, -0.0283, -0.0641,
        -0.0369, -0.1062, -0.0457, -0.0887, -0.0891, -0.1940, -0.1277,  0.0739,
         0.0079,  0.0237,  0.1175,  0.0264, -0.1129, -0.0203,  0.0429,  0.0214,
         0.0386, -0.0802, -0.4562, -0.0491,  0.0165,  0.0135,  0.0535,  0.3353,
        -0.3134], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1374, -8.3666,  0.8544, -0.6264,  0.9376,  0.1988,  1.3069,  0.3671,
         0.2234,  0.0716,  0.2316, -0.1031, -0.0167, -0.7451, -0.0712,  0.2793,
         0.0432,  0.2627,  0.2296,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2369, -5.5134, -0.1763,  0.2524,  3.9410, -0.2069,  0.0236, -0.1019,
         0.5362,  0.2822,  0.7024, -0.1864, -3.0984,  0.1898,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.2719e-02, -5.5662e+00, -1.7375e-01, -2.7107e-01, -1.5696e+00,
        -3.9946e-01,  1.1643e-01,  4.3677e-01,  7.1253e-02,  7.3207e-02,
         4.1713e-03, -8.0457e-02, -2.3045e-01,  3.4977e-01, -6.3666e-02,
         2.4307e-02,  2.0612e-01, -4.0664e-02, -2.9186e-02, -1.0702e-01,
         7.6880e-01,  1.5858e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2423,  3.0717,  0.3082,  0.2530,  0.1711, -0.0991,  0.0075, -0.1815,
        -0.0106, -0.1431,  0.0488,  0.0083, -0.1763,  0.0282, -0.0137,  0.0085,
        -0.0594, -0.0163,  0.0572, -0.0950, -0.0197,  0.0199,  0.0631,  0.0431,
        -0.1249, -0.0476, -0.0554,  0.0476, -0.2463, -0.0308,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9498,  4.5939,  0.5716, -0.7751,  0.1898, -0.5865,  0.0866,  0.6734,
        -0.3252, -0.3784, -0.2179, -0.5614, -0.1160,  0.0270,  0.5453, -2.1493,
        -0.1839,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.2746, -12.1681,  -1.1586,  -1.6189,  -0.1249,   0.1920,  -0.2750,
          0.8833,   0.1079,   1.1221,   1.1034,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1229e-01, -2.5927e+01, -1.6415e+00,  5.2889e-01, -1.5034e-01,
         3.2626e+00, -7.1746e-01, -2.1809e-01,  3.3303e-01, -3.6356e-01,
         2.1502e-01, -3.7387e-02,  5.6904e-01, -1.4987e+00, -1.2572e-02,
         1.1100e+00,  6.2677e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9910,  4.1545,  0.1266,  0.2946, -0.4217, -0.0441, -0.1680,  0.0827,
         0.0660, -0.1427, -0.1274,  0.0712, -0.2467,  0.0394, -0.6193, -0.1029,
        -0.0122, -0.0850,  0.0287, -0.1472, -0.1354, -0.1290,  0.1095,  0.0137,
        -0.6143, -0.6122,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1406,  3.4039,  0.5292,  0.0223, -0.1955,  0.0661, -0.0275,  0.1020,
        -0.1345, -0.1011, -0.0536, -0.0938, -0.0210,  0.0763, -0.0597,  0.0117,
         0.0311,  0.0960, -0.0463, -0.0318,  0.0198,  0.0042,  0.0540,  0.0245,
         0.0568,  0.0318,  0.0339,  0.0607, -0.2250,  0.1155,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5556e-01, -4.1720e+00, -1.2363e-01, -6.2667e-01,  6.8926e-02,
        -1.0835e-01,  1.0989e-01,  1.1785e-01,  1.7512e-01,  1.7610e-01,
        -3.0519e-03,  6.2668e-02,  1.4046e-01, -1.1050e-01, -3.0588e-01,
        -1.0075e-01,  1.1147e-01,  3.4097e-01,  9.5061e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-9.9720e-02, -2.9131e+00, -5.5060e-01, -3.2012e-01, -1.1742e-01,
        -6.2986e-04,  1.2210e-01,  5.8977e-03, -7.4095e-02,  1.8656e-02,
         1.1249e-02,  4.4007e-02, -8.0262e-02, -6.5102e-02,  3.3437e-02,
         3.6623e-02, -1.3037e-01,  4.5215e-02, -7.9096e-02, -7.3895e-02,
        -1.7783e-02,  1.8159e-02, -1.4327e-02, -4.5936e-02, -3.0582e-02,
         1.7673e-01,  1.5028e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4848e-01,  5.5250e+00, -7.3598e-01, -9.5130e-01,  1.0108e-02,
        -6.2272e-01, -2.7125e-01,  4.6681e-03,  3.5920e-01,  2.3867e-01,
        -1.1637e+00, -2.0906e-01,  4.7499e-01, -1.0130e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8726e-01,  8.7886e+00,  6.7472e-02, -1.9716e-01, -4.8040e-03,
         2.0353e-01,  2.5551e-01, -1.7370e-02, -2.0311e-01, -2.0141e-01,
        -1.3019e-01, -2.8823e-01,  2.2158e-01, -1.2416e-01, -1.6548e-01,
         3.1248e-01,  7.3527e-02,  8.7563e-02, -3.2728e-01,  4.4093e-02,
        -1.3272e-01, -7.2521e-02, -1.5401e-01, -2.1778e-01, -4.0640e-01,
        -1.1056e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2278,  4.7353,  0.2474, -0.1183, -0.1185, -0.3500,  0.0288, -0.2481,
        -0.3886,  0.4364,  0.1658, -0.1761, -0.0689,  0.0316,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7480e-01,  7.9446e+00,  2.2229e-01,  8.6883e-02,  3.0015e-01,
        -6.6598e-01,  9.3676e-02, -1.7738e-02,  1.3379e-01,  1.7237e-01,
        -1.6785e-01, -1.5621e-01, -1.5865e-01, -3.5593e-01,  2.2676e-01,
        -5.0287e-03, -6.0004e-02, -8.1785e-02, -1.9206e-01,  2.9775e-01,
        -1.3286e-01,  5.7163e-02, -1.9733e-01,  9.0750e-02,  9.9563e-02,
        -9.4235e-02, -1.5533e-01, -5.0417e-01,  1.1557e-01, -1.1742e-01,
        -9.6189e-02,  1.3343e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3558e+00,  1.0882e+01,  8.2728e-01, -4.6568e-01, -4.5872e+00,
        -1.7218e-02, -5.9028e-01,  1.4059e-01, -4.7835e-01, -4.3701e-01,
        -3.9374e-01,  3.3851e-01,  1.6839e-01, -2.0893e-01,  4.1991e-01,
        -6.4059e-02, -6.3597e-03,  9.3407e-02,  4.0526e-03,  1.6938e-01,
         4.6150e-02,  6.5225e-02, -1.5117e-01,  2.1819e+00, -5.0624e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5224,  8.9863,  0.9312, -1.0247, -0.5143,  0.5692,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7539e-01,  5.6150e+00,  7.3044e-01, -1.7322e-01,  1.5147e-01,
         1.0784e-01,  8.8958e-02, -5.7060e-02,  3.0075e-02, -1.8307e-03,
         7.0163e-02,  3.8312e-02, -2.2428e-02, -4.9154e-03,  1.1135e-02,
        -1.3021e-02,  6.6036e-02,  1.2830e-01,  6.9032e-02,  5.2984e-02,
        -9.7433e-02, -9.2040e-02, -9.0350e-04, -8.7115e-03,  4.8169e-02,
         4.9243e-02, -2.1921e-02, -3.2263e-02, -5.8675e-02,  1.2448e-02,
         1.1682e-01,  7.0885e-02, -2.6209e-02,  3.8307e-02, -2.3669e-03,
         1.9910e-01,  1.3357e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6572e-01, -7.4235e+00,  9.1022e-01, -4.8352e-01,  6.3226e-01,
        -3.6957e-01,  6.2555e-02,  6.1295e-02,  3.5012e-04, -8.6468e-02,
         1.7167e-02, -1.8860e-01, -1.9201e-01, -2.7989e-02,  2.0641e-01,
         1.9059e-01,  1.6960e-01, -1.8159e-01,  9.3936e-02,  1.8553e-01,
         3.8772e-01,  4.6284e-03,  2.1763e-02, -1.2396e-02, -3.6357e-04,
         1.0282e-01, -1.9125e-01, -2.3159e-02,  1.6367e-01,  1.5519e-01,
         4.1449e-02, -4.0428e-04, -1.4903e-03,  1.0806e-01, -9.7595e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2009, 10.9637, -8.3617, -1.8512, -0.4078,  0.3840,  2.0732, -0.1804,
        -1.7538, -1.2896,  0.2578,  0.0373, -0.2726,  1.4051,  2.7851,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5990e-01,  7.0743e+00,  5.1774e-01, -5.9311e-01,  1.2984e-01,
         2.4364e-02,  1.8868e-01, -2.9297e-01,  1.0401e-01,  1.2597e-01,
         4.2579e-02, -2.4874e-02, -2.0028e-02, -1.2346e-01,  9.6433e-02,
        -2.4131e-01, -1.5540e-02,  1.8336e-01, -7.0840e-02, -8.8492e-02,
        -2.9329e-02, -2.1005e-01, -8.3134e-02, -2.4804e-02, -5.3896e-02,
        -3.7412e-02, -6.5664e-02, -4.0443e-02, -8.8173e-02, -5.0332e-03,
        -2.0234e-01,  1.7869e-01, -1.1994e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.6642e-02,  1.4265e+01,  2.6362e-01, -6.3642e-02, -2.6099e-01,
        -5.8907e-01,  1.5921e-01, -9.9246e-02, -5.4007e-01, -1.8382e-01,
        -3.8778e-01, -4.1445e-01, -2.9322e-01, -9.9513e-02, -3.0012e-01,
        -1.3980e-02, -1.7118e-01, -1.8808e-01,  1.5629e-01, -1.8427e-01,
        -5.9273e-02, -5.8170e-02, -1.8453e-01,  1.0825e-02,  5.7073e-02,
        -2.2588e-01, -5.0637e-01,  1.3219e-01,  7.9054e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.4609, -9.8655,  1.6550,  1.2948,  0.6708, -0.2133,  0.7658, -0.0347,
        -0.0925,  1.0242,  0.0103,  0.4052,  0.3288,  0.7416,  1.9944,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3045e-01,  5.6095e+00, -5.0907e-01, -4.4989e-01,  2.2721e-01,
        -2.0189e-01,  1.7350e-01,  2.9807e-01, -2.4570e-01, -2.6744e-01,
        -6.4722e-02, -4.6300e-02,  7.6028e-02, -5.8880e-02,  8.8142e-02,
        -2.1747e-01, -1.4147e-01, -5.3086e-02,  6.1052e-03, -3.9694e-02,
         3.0773e-03, -1.7291e-01, -1.0335e-01, -4.4788e-02, -7.7937e-02,
        -1.1045e-01, -3.4445e-02, -1.6180e-02, -1.9177e-03,  3.5664e-02,
         3.5736e-02,  6.7004e-02,  8.7953e-02, -8.7515e-02, -1.1966e-01,
        -8.9836e-02, -6.9363e-02, -4.1948e-02, -1.1705e-01, -7.7697e-02,
        -4.1320e-02, -8.6456e-02,  7.0581e-02, -6.3978e-02,  1.5505e-03,
        -3.3696e-02,  1.9921e-02, -2.9166e-01,  2.3486e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5525,  9.8103,  0.4739, -1.2126,  0.0631,  0.7660,  0.1792, -0.3428,
        -0.8464, -0.0390, -0.2004, -0.1401,  0.2105,  0.1402,  0.1930, -0.3077,
         0.3333,  0.0510,  0.0396,  0.3523, -0.0721,  0.0719, -0.0617, -0.1177,
         0.1358,  0.1195, -0.3206,  0.3532,  0.1695, -0.2420, -0.0410, -0.1214,
         0.3035, -0.4547, -0.2982,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1749, -6.1420, -0.0386,  0.0344,  0.2991,  0.2870,  0.2615,  0.2052,
        -0.0513, -0.1047, -0.2885,  0.1203,  0.0434, -0.0299, -0.0413,  0.0277,
         0.4101, -0.1418,  1.3190,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  3.7698, -12.9167,  -0.5373,  -0.0314,   1.9312,  -0.6449,   0.6886,
         -1.2052,   1.8296,   0.6116,   0.4341,  -0.2917,  -0.1043,  -1.2382,
          0.6871,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1658e-01, -6.1471e+00, -8.5667e-01,  7.1259e-01, -1.4942e-02,
        -2.5830e-01,  1.9494e-01,  4.7160e-03,  3.1774e-02, -1.9136e-02,
         1.6124e-01,  2.5209e-01,  2.6993e-01, -2.9244e-02,  4.5968e-02,
         2.7074e-01,  2.2921e-03,  8.8428e-02,  2.0609e-02, -3.3807e-01,
        -1.8401e-05,  6.0304e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1497e-02,  6.9940e+00,  2.3748e-01,  1.2240e+00,  5.6850e-01,
         4.3226e-01, -4.7636e-01, -2.1549e-01, -4.8144e-02,  1.9006e-01,
        -1.6803e-01, -5.6701e-01, -1.2281e-01, -3.4853e-01, -3.8842e-02,
        -5.8166e-02, -2.9229e-01, -1.7399e-01,  2.5272e-01,  3.0535e-02,
        -2.1561e-01,  2.8833e-01, -1.2326e-01, -3.7951e-02,  3.2554e-01,
        -2.2972e-02, -1.6227e-01,  3.9816e-02, -3.3452e-01, -3.2653e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5021, -6.3828,  1.5235, -0.1572,  0.4678, -0.5584,  0.8733, -0.1073,
         0.7277, -0.8423,  0.6425,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0386e+00,  1.3320e+01, -1.5103e+00, -2.4290e-01, -4.1503e-01,
         1.4636e-02, -6.2805e-02,  3.5220e-01,  3.7246e-01,  3.1536e-01,
        -1.0963e-02, -5.1714e-02, -1.0168e-01, -8.5799e-02,  2.1441e-02,
         1.3675e-01, -1.6242e-01,  2.7524e-03, -4.0513e-02, -1.4445e-01,
         5.2712e-01, -1.0731e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8936e+00, -3.2070e+01, -7.0903e-01, -1.2714e+00, -8.8507e-02,
         1.5035e-01, -2.1070e-01,  6.4133e-01,  3.6510e-01,  9.6082e-01,
         1.1634e+00,  1.0955e+00,  6.2114e-01,  3.9223e-01,  3.1071e-01,
         4.3185e-02,  5.0990e-03, -1.8376e-01, -5.6131e-01, -5.6199e-02,
         2.1952e-01,  1.3705e-01,  6.6674e-01,  1.2306e-01,  1.5317e-02,
         3.9122e-01,  6.0564e-01,  4.0209e-01,  5.9314e-01,  8.9182e-01,
         1.3735e-01,  8.0722e-01,  6.3571e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3211, 16.0571,  2.0641, -2.1701, -1.0192, -0.1237,  0.1021,  0.1264,
        -0.0724, -0.7104, -0.1761,  0.1071, -0.1701, -0.6891, -0.6280,  1.4151,
         0.0618,  0.0998,  0.0342, -0.4357, -0.2016, -0.7299,  0.1177, -0.3204,
        -0.0594, -0.0324, -0.8947,  0.4448,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2875e-01,  8.3054e+00, -1.2130e-01,  1.7811e-02,  1.3055e-01,
        -1.4742e-01, -4.1963e-01, -7.0140e-01, -3.0704e-01, -2.5904e-02,
        -2.3355e-01,  1.4155e-01, -2.6333e-02, -6.4409e-02,  3.6953e-02,
        -3.6506e-01, -1.5660e-03, -1.2385e-01, -7.9082e-02,  3.6184e-01,
         4.1903e-02, -2.9296e-03, -5.1797e-01,  3.2577e-01,  2.9168e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-0.1004, -3.2160,  0.0836, -0.0383,  0.1563,  0.0639,  0.2237,  0.1443,
         0.0592,  0.0658,  0.0096,  0.1898, -0.0127, -0.0632,  0.0353,  0.1196,
        -0.0264,  0.0447,  0.0261, -0.0056,  0.0382,  0.0985,  0.0804, -0.0837,
         0.2271, -0.0482,  0.3693,  0.0790,  0.0644, -0.1694, -0.2260,  0.0052,
        -0.0912, -0.0147, -0.0584, -0.0247, -0.0202, -0.0991, -0.0699, -0.0448,
        -0.0674,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1306,  6.6109, -0.4003, -0.3953, -0.3385, -0.2048,  0.1942, -0.2707,
        -0.0751,  0.0163,  0.0536, -0.2451,  0.0745,  0.0378,  0.1080,  0.0874,
         0.1337,  0.1129, -0.0856, -0.0499,  0.0212,  0.0595,  0.0430, -0.1459,
         0.1350,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1614e-02, -7.0443e+00, -2.8492e-01, -1.4729e-01, -4.5092e-02,
        -2.8457e-01,  3.6745e-03, -5.0190e-01, -2.7895e-01,  4.9445e-01,
        -2.2586e-02, -9.3778e-02, -6.1112e-02, -3.9526e-03,  1.5076e-01,
         2.5002e-01,  1.4661e-01,  4.5466e-01,  2.1244e-01,  3.5054e-01,
        -5.7961e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3916,  4.3098,  0.2587,  0.1343,  0.2390,  0.0601,  0.0542, -0.1847,
         0.0255,  0.0199, -0.0597,  0.0240, -0.0412, -0.0909, -0.2031, -0.0771,
        -0.0767, -0.3589,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4200,  4.2827,  0.9676,  0.2774,  0.0408, -0.4529,  0.2290, -0.0184,
        -0.0725,  0.0124, -0.0119, -0.0983,  0.0382,  0.1156,  0.1166, -0.0452,
        -0.1823, -0.0500, -0.0056,  0.1422, -0.0354, -0.0269,  0.0976, -0.0070,
        -0.1222,  0.1271,  0.0411, -0.3307, -0.0380,  0.2430,  0.0251,  0.0495,
        -0.0357, -0.0272,  0.1908,  0.0920, -0.3689, -0.1510,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2280, -7.5673,  1.2580,  0.9197, -0.1874,  0.1476,  0.4211, -0.1218,
         0.2723, -0.1357,  0.0993,  0.3274, -0.0554,  0.2389, -0.1545, -0.2460,
        -0.3614,  0.2240,  0.5563, -0.1154,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0903, -8.5304, -0.2545, -0.1858, -0.1431, -0.2989, -0.3269, -0.0723,
        -0.0473,  0.2687, -0.1831,  0.1813,  1.1425,  0.5513,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4395e-01,  1.4993e+01, -2.6039e+00, -2.1387e+00,  5.5413e-01,
         1.5823e+00,  3.2706e-01,  1.2427e+00,  1.0801e-01,  1.9061e+00,
         1.1299e+00,  3.3738e-02, -6.9358e-01,  2.4009e-04,  7.4544e-01,
         1.1547e-01,  5.7266e-01,  4.7031e-01, -1.3033e-01, -2.5209e-01,
         6.5613e-01,  1.1777e-01,  3.1074e-01, -6.2438e-01,  2.5102e-02,
        -3.8598e-01,  3.4120e-01,  5.2514e-01,  5.6631e-01,  3.1980e-02,
         2.5478e+00,  1.9397e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6531e-01,  2.1584e+01,  3.6916e+00,  1.8020e+00, -1.9512e-02,
        -6.6191e-02,  3.5653e-01,  6.6007e-02, -2.2766e-01,  9.1912e-01,
        -3.8277e-01, -4.5177e-01,  8.9235e-02,  2.2701e-01,  5.1891e-01,
         3.5817e-01,  7.7877e-03, -3.7531e-01,  4.2640e-01, -2.2227e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.8089e-01,  8.4404e+00,  7.5873e-01, -1.7913e+00, -6.1452e-01,
         2.8802e-01, -1.1219e-01, -1.2006e-01,  6.8612e-02, -4.2458e-01,
        -3.0922e-01, -2.1716e-01, -4.1537e-02, -2.9190e-02,  4.5116e-02,
        -1.2004e-01,  3.0437e-02, -5.9365e-02, -3.3430e-01,  8.4091e-03,
        -3.6904e-01, -3.0163e-01,  6.4821e-02,  1.9792e-01, -1.3087e-01,
        -8.6184e-02,  2.5331e-02,  3.3286e-02,  5.8409e-02, -1.1183e-01,
        -3.4857e-01, -3.3782e-01,  1.4437e-02, -1.4794e-01, -3.1842e-01,
         1.5510e-03,  7.1163e-02, -1.1525e-01, -9.3255e-02,  7.4475e-02,
        -3.0009e-01, -4.8515e-02,  5.6524e-01,  9.8443e-02, -7.9879e-02,
        -6.3553e-01,  1.5776e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.4313, -15.8305,  -0.0812,  -0.2054,  -1.6775,  -0.1639,  -0.0784,
          0.4075,  -0.3978,  -0.0982,  -0.2328,  -0.3003,   0.0838,   0.7188,
          2.1862,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5645e-01, -2.1261e+01, -2.4085e+00,  4.2717e-01,  1.1841e-01,
         6.8790e-01, -1.4535e-01, -2.6862e-01, -8.0795e-01,  6.7737e-01,
        -1.5057e-01,  8.5605e-02, -3.0368e-01, -1.8322e-01,  3.8523e-01,
        -1.3904e-01, -1.1007e-02,  3.6844e-01,  3.0832e-01, -3.2643e-01,
        -2.0083e-01,  2.0818e-01,  1.3165e-01,  2.8113e-01, -3.0331e-02,
        -2.4343e-01, -4.2861e-01, -1.5581e+00, -5.8315e-02, -1.3470e-02,
         1.0776e-01,  3.0300e-01, -9.2740e-02, -1.9910e-02, -1.8212e-02,
         2.2648e-01,  6.5224e-02, -5.5022e-02,  4.6237e-01, -7.0147e-02,
        -1.2233e-01,  9.3159e-02,  1.2331e-01,  5.3737e-02,  8.1850e-02,
        -1.0572e-01,  8.3457e-01, -6.7924e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 7.6977e-01, -1.1055e+01, -1.2831e+00, -8.3878e-01,  2.9172e-03,
         5.8533e-02, -6.1460e-01, -5.9659e-01, -8.0715e-01, -3.1970e-01,
        -4.5827e-01, -5.5856e-01,  1.5919e-01, -3.3839e-01, -1.3111e-01,
        -2.9097e-02,  3.1163e-01, -1.0696e-01,  1.1934e-01, -3.5829e-01,
        -3.1511e-01,  4.2132e-02,  4.2391e-01,  1.2032e-01,  2.4923e-01,
        -1.4658e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.2278, -11.9098,  -0.2721,   0.2649,   0.3654,  -0.7357,   1.1408,
          0.2584,   0.2936,  -0.3969,  -0.7915,   0.3858,  -0.0621,   0.2944,
          0.1557,  -0.1166,  -0.2060,   1.0971,  -0.1599,   0.7704,   0.3297,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2104e-01,  5.1418e+00,  4.2364e-01, -3.4332e-01, -1.6653e-01,
        -1.5354e-01,  8.2547e-02, -1.2947e-01, -1.9631e-01, -2.4408e-02,
        -1.3935e-01, -6.3749e-02, -3.6901e-01, -3.4419e-01, -1.4978e-01,
        -7.0357e-02, -2.5984e-01, -3.5523e-02,  4.2023e-01,  3.2490e-01,
         1.6037e-01,  2.9506e-03,  4.8004e-01,  7.9037e-02,  2.1719e-02,
        -9.7336e-02, -3.6123e-02, -2.2231e-02, -5.0651e-02,  1.6418e-02,
        -1.4665e-01,  5.2615e-02,  9.4698e-02, -5.8382e-02, -3.0677e-01,
         6.6421e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2789,  6.5690, -0.0355,  0.0529,  0.2856, -0.1660, -0.3886,  0.1267,
         0.0922,  0.2020, -0.0344,  0.2042,  0.0885,  0.0649, -0.1587,  0.0487,
         0.0609,  0.0195, -0.5319, -0.2346,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.7708e-01, -9.6226e+00, -6.4870e-01, -6.2119e-01, -1.4503e-01,
         2.6442e-02, -1.4362e-01, -7.6961e-01,  3.4663e-01,  1.3190e-03,
        -4.6103e-01,  2.1177e-01, -2.5151e-01, -2.7678e-01, -2.0734e-02,
        -4.5884e-01, -1.9260e-02, -5.7970e-02,  4.7109e-01,  3.7727e-01,
         4.9765e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7715, -8.8492, -0.9398, -0.0862, -0.2123,  0.0383,  0.6219,  0.6967,
        -0.3853,  0.0392,  0.2437,  0.5088, -0.3654,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2412e-01, -8.8948e+00, -8.6183e-01,  5.4468e-03, -1.6614e-01,
         2.0902e-01,  3.6967e-01, -2.9642e-01, -1.1878e-01, -3.1192e-01,
         2.5065e-02,  3.1359e-01,  1.0649e-01, -1.9664e-02, -2.5350e-01,
         2.0099e-01,  4.0961e-01,  5.0753e-02,  1.4805e-02,  9.5667e-03,
        -4.8160e-01,  4.6693e-01, -1.0989e-02,  2.4421e-01, -5.3004e-02,
         1.3603e-01,  9.3476e-02,  2.4494e-02, -4.5274e-02,  2.8810e-01,
         9.4584e-01, -1.1347e-01, -2.6636e-01, -6.2553e-02,  7.3034e-02,
        -2.2347e-01, -2.2862e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5222, -9.3123, -1.8154, -0.2232,  0.5838, -0.0202, -0.0265,  0.1079,
        -0.2096, -0.0760, -0.0709,  0.2580, -0.2139, -0.1512, -0.0765, -0.1240,
        -0.0100, -0.0167,  0.0208,  0.1175, -0.2923,  0.1776,  0.0782, -0.6687,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3613e-01,  5.2139e+00, -1.2759e-01,  9.2094e-01, -3.9531e-01,
        -1.0343e-01, -8.2609e-01,  7.0289e-01,  1.0167e-01, -4.5380e-01,
        -3.4236e-01, -6.9482e-01, -1.7131e-01,  7.7855e-02, -4.3023e-02,
        -2.6014e-01, -1.0567e+00, -6.1880e-01, -4.8649e-01, -3.4135e-01,
        -1.3898e-01, -5.7667e-01, -2.8865e-02,  1.4184e-01, -2.9825e-01,
         6.2811e-03,  3.3031e-02,  5.6907e-02,  1.9227e-03,  3.2273e-01,
         2.5611e-02, -6.2658e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7083, -7.5304,  0.0276, -0.1895,  0.0104,  0.3739, -0.0416,  0.1403,
        -0.2363, -0.2760,  0.1112, -0.0930,  0.0614,  0.1758,  0.0758,  0.0851,
         0.1369, -0.0869,  0.1769,  0.0577,  0.0871, -0.0145,  0.0553,  0.0608,
         0.1255,  0.3741,  0.3243, -0.1130,  0.0148,  0.4186, -0.2246,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6860e-01,  7.5631e+00,  1.2588e-01,  2.2909e-03,  6.6324e-01,
         2.1264e-01, -1.7142e-01, -3.4252e-01,  1.5679e-01, -1.8014e-02,
         1.9711e-01,  5.9113e-02,  7.3280e-03, -8.5522e-02,  1.1946e-01,
         1.1460e-01,  3.4267e-04, -2.1299e-02,  1.0730e-01, -3.2110e-01,
         4.5256e-01,  6.2848e-01, -2.2890e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0359e-01,  4.1567e+00,  9.7746e-02,  2.4048e-01,  7.4849e-02,
         3.6912e-01, -5.7669e-03,  1.9662e-01, -1.0681e-01, -6.5552e-02,
        -1.1669e-01,  1.5427e-01,  6.4599e-02, -8.2869e-02,  1.6798e-01,
        -3.3672e-02, -5.7623e-02, -2.0718e-02, -1.1263e-02,  8.1263e-02,
        -3.0611e-01,  1.6798e-01,  1.6278e-01, -3.0051e-03, -6.8031e-03,
        -3.7647e-02, -7.6095e-02,  4.8706e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-1.0806e+00,  9.9632e+00, -9.4374e-01,  4.6883e-01,  6.5985e-02,
         2.2550e-01,  1.2753e-01, -3.9747e-01, -2.3981e-01, -2.1184e-01,
        -4.1145e-01, -2.0989e-01,  2.2313e-01, -4.6801e-03, -1.9949e-02,
        -4.5721e-02, -2.3182e-01,  7.2188e-02, -2.5403e-01,  1.6462e-01,
         5.1057e-01,  7.2709e-03, -4.2684e-01, -1.0891e-01, -2.0342e-01,
        -2.6436e-01,  1.6833e-01, -1.5658e-01,  2.5353e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8980e-01, -4.4041e+00,  4.6758e-03, -3.6062e-02,  3.0589e-01,
        -6.4595e-02, -5.8958e-02,  3.1662e-02,  2.2478e-01, -5.0203e-02,
         5.3282e-02,  9.3978e-02,  2.4223e-01,  8.2087e-02,  8.2615e-02,
         4.3212e-03,  1.1259e-01,  1.4775e-01,  1.9997e-02,  1.6581e-01,
         1.7895e-01, -1.9157e-01,  3.3147e-01,  1.7633e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8674e-02,  3.8107e+00,  1.2187e-01, -3.6020e-01,  3.0602e-01,
         5.0823e-02,  2.0402e-01, -2.3195e-01,  1.9763e-01,  2.5242e-02,
        -2.9149e-01,  2.1241e-02,  1.9257e-01,  1.4892e-01, -8.2284e-02,
        -3.3708e-02,  4.2557e-02, -3.7828e-02,  7.4871e-02,  2.3517e-02,
        -1.8836e-01, -1.1471e-01, -2.1924e-03,  1.3106e-01, -1.6361e-02,
         7.8803e-02,  3.5572e-02, -4.5622e-02,  7.6891e-02,  6.1364e-02,
         1.0550e-01, -5.8845e-02,  1.4996e-01, -6.1062e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.7875, -10.5365,  -1.2674,   0.0223,   0.1477,  -0.0243,  -0.8468,
         -0.5883,   0.0222,  -0.3460,   0.1182,  -0.0962,   0.0221,   0.1596,
          0.0254,  -0.2631,  -0.4155,  -0.5259,   0.4076,  -0.2713,   0.2619,
         -1.1730,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0338, -4.3967,  0.2579, -0.1378,  0.3775, -0.4753, -0.1051,  0.1263,
        -0.1640,  0.1402,  0.2296,  0.0720, -0.0694, -0.0736,  0.1494,  0.1137,
        -0.4333, -0.0823,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.7539, -10.8054,  -0.7277,  -1.3085,  -0.9181,  -0.0243,  -0.5189,
         -0.1058,   0.2074,  -0.5651,  -0.1519,  -0.0419,   0.6193,   0.0882,
          0.2154,   0.3553,   0.4592,   0.0965,   0.2278,   0.1252,   0.1582,
         -0.5308,   1.2935,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4708e-01, -1.0069e+01, -6.1840e-01, -2.2178e+00, -2.7449e-01,
         3.0309e-01, -2.4322e-01, -3.5044e-01, -1.0185e-01, -2.6496e-02,
         1.3578e-02, -3.7598e-02, -1.7793e-01,  7.4787e-02,  3.5693e-04,
         2.0889e-01,  9.4390e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7859,  8.0537,  1.5033,  0.7590, -0.0706,  0.2844,  0.2848, -0.1740,
        -0.1811, -0.2117,  0.0138, -0.0808,  0.0776, -0.2319,  0.1563,  0.2579,
         0.5325,  1.1492,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5634, -5.4137, -0.1098,  0.2720, -0.1030, -0.4499, -0.1116, -0.2884,
         0.0429, -0.1369, -0.1266, -0.1808, -0.1359,  0.0141,  0.1445, -0.1516,
        -0.2422, -0.2822, -0.1172,  0.1028,  0.2856, -0.0346, -0.3173,  0.0413,
        -0.1097,  0.1763,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4228, -5.5604,  0.8269,  0.3833,  0.5203,  0.2506,  0.2242,  0.0468,
         0.2562,  0.0793, -0.0594, -0.0968, -0.1751,  0.1752,  0.0457, -0.1328,
        -0.1516, -0.0480,  0.1315,  0.2050,  0.0148, -0.0513, -0.0087,  0.0865,
        -0.1497, -0.1414,  0.0147, -0.0140, -0.2689,  0.1296,  0.6571,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8216e-01,  9.7497e+00,  7.7364e-01,  6.0456e-02, -1.6166e-01,
         3.1449e-01, -2.8770e-01,  9.1472e-01, -3.8521e-01,  2.0192e-03,
        -1.3581e-01,  3.3485e-02, -3.7033e-03,  1.9395e-02, -2.5577e-01,
         4.5222e-02, -3.4831e-01, -1.4093e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1894, -5.7758, -0.2208, -1.4154, -1.2083,  0.4374, -0.7335, -0.3607,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-6.7813e-01, -1.2096e+01, -9.6032e-01,  5.6027e-02, -1.0668e-01,
         4.3638e-01,  4.2608e-01,  3.6573e-02,  3.3641e-02,  1.3882e-01,
         2.0955e-01, -3.0974e-01, -9.5413e-02, -1.9066e-01, -4.3252e-01,
        -1.5925e-01,  6.1944e-02,  4.6613e-01,  1.2748e-01,  8.1644e-02,
        -7.9373e-01,  1.3979e+00, -3.7516e-01, -8.9769e-02,  9.9310e-02,
         2.1293e-01, -1.1141e-02, -1.5329e-01,  1.0924e-01, -6.5675e-02,
        -7.3717e-03, -3.0919e-02,  1.3395e-01,  1.7155e-01,  1.2832e-01,
         2.0892e-02,  1.9600e-01, -8.6940e-02,  5.9143e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1091, -9.3080,  1.3048,  1.1834, -0.0324, -0.1832, -0.2917, -0.4057,
        -0.3044,  0.0227, -0.2765, -0.5835,  0.0535, -0.2924,  0.6534, -0.0349,
         0.4047,  0.5777,  0.3907,  0.6125,  0.5103,  0.0650, -0.1993,  0.0851,
        -0.1406,  0.1384,  0.2823,  0.7497,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1129, -3.9916, -0.3676, -0.9172,  0.1591,  0.0174, -0.3671,  0.1756,
        -0.1117, -0.1450, -0.0481,  0.2375,  0.0412,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1045,  9.1350, -0.9330, -0.4049,  0.0903,  0.5344,  0.3286,  0.0476,
         0.0709, -0.2374, -0.3297, -0.6014,  0.0253, -0.3355, -0.7147,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6841e-01,  7.5869e+00, -1.9913e-02, -1.5290e+00,  8.7740e-03,
         6.2401e-01, -6.3499e-01,  1.2836e-01, -1.5330e-01, -1.6918e-01,
        -3.4015e-03,  2.0913e-01, -3.9943e-01, -3.1261e-01,  5.3897e-02,
        -4.3215e-02,  9.8859e-03, -2.4670e-01,  1.0522e-04,  1.1172e-01,
         1.9983e-01,  5.4568e-02, -1.9158e-04,  1.5360e-01, -5.0781e-01,
         1.3230e-01, -1.2556e-01, -7.5994e-02, -2.2573e-01,  6.4062e-02,
         1.5978e-01,  5.5560e-03,  2.9120e-01,  1.8980e-01,  3.8800e-01,
         8.5098e-02,  1.8756e-02,  7.8367e-02, -4.7583e-03,  6.2917e-02,
        -3.1262e-01, -9.4476e-03, -6.0642e-02,  1.9141e-01,  3.6736e-01,
         8.1309e-01, -4.9151e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6925e-01, -8.1273e+00, -2.0334e-02,  4.2340e-02, -4.5331e-01,
         8.3445e-01,  7.8459e-02, -4.3256e-02,  4.1630e-01,  1.3756e-01,
         1.5237e-01,  1.2565e-01, -1.0280e-02,  3.8455e-02,  2.2281e-02,
        -2.4548e-01, -3.8050e-02,  5.8298e-01, -2.5587e-02,  6.6934e-03,
        -4.4874e-02, -6.7866e-02,  4.5900e-02,  1.9368e-02,  2.6366e-01,
         2.2939e-01,  1.9104e-01,  3.3257e-02, -1.3556e-02, -6.0311e-02,
        -4.6134e-04,  3.8287e-02,  1.2375e-01, -2.8249e-02, -1.3584e-01,
        -1.3240e-01, -3.0306e-02,  7.6688e-03,  3.5955e-02,  3.8931e-02,
        -1.3548e-02, -1.3098e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0956, 19.8591,  1.8879, -0.3770, -0.9466, -1.2527, -0.2537, -1.2571,
        -0.1359, -0.6795,  1.3876, -0.4009, -0.3870,  0.4928,  0.1342,  0.3282,
        -0.9528,  0.3869,  0.2483, -0.1099, -0.8722, -0.4268, -0.0501, -0.9529,
        -1.7570,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5756e-01, -1.5624e+01, -1.7037e+00, -6.1332e-02,  1.8382e-01,
        -6.9111e-01,  8.8423e-01,  6.2011e-01,  2.6969e-01, -5.9202e-02,
         1.9298e-01, -4.9730e-02, -3.7217e-02,  2.2779e-01, -1.0240e-02,
        -3.5005e-01,  7.4802e-01, -9.4157e-02, -5.5006e-02,  9.7310e-02,
         1.2887e-01, -6.1140e-01, -4.3746e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -7.9884, -10.2094,   0.2337,  -0.1043,  -1.1667,   0.7199,   0.3965,
         -0.1339,   0.6673,  -1.1530,   0.3040,   0.6181,   0.6373,  -0.4934,
          0.2435,  -0.2153,   0.1514,   0.0559,   0.3187,  -0.1519,  -0.0898,
          0.1370,   0.1112,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8158,  7.0688, -0.5050,  0.3718, -0.7336, -0.1050,  0.3712,  0.5977,
         0.2989, -0.0729,  0.9244,  0.4690,  0.9823,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0688, -8.3145,  0.8238,  0.4018,  0.1637,  0.3450,  0.2597,  0.2599,
         0.4561,  0.3638,  0.7719,  1.8853,  1.0099,  0.2506,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.3631e-01,  1.0126e+01,  8.4117e-01,  3.1138e-01,  4.3303e-01,
         3.0200e-01,  3.8593e-03, -1.3111e-02,  1.5568e-01,  2.4325e-01,
        -1.2163e-01,  8.6190e-03,  5.5706e-01,  2.9158e-01,  3.7310e-01,
         2.6782e-01, -5.4607e-01, -8.4702e-02,  1.0537e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-2.5352e+00, -5.5547e+00, -4.5571e-01,  2.1839e+00, -2.3313e-01,
         1.5701e-01, -1.8045e-01, -1.0085e-01,  1.3731e-01, -3.6797e-02,
         1.9487e-01, -1.0819e-01, -2.8217e-02,  7.8960e-02,  3.1518e-01,
        -7.3283e-02,  6.7152e-02,  4.3134e-01,  2.1731e-03, -7.8535e-02,
         7.7874e-02,  2.7651e-01, -3.7260e-02,  1.0773e-02, -3.6184e-01,
         6.9792e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.5449, -12.2590,  -0.7523,   0.4907,   0.5486,  -0.1276,   0.6498,
          0.6088,  -0.0176,  -0.1539,   0.3500,   0.1893,   0.7208,  -0.2608,
          0.8583,   0.0476,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0140,  6.9601,  0.3939,  0.8128,  0.2767,  0.4386,  0.2111, -0.0648,
        -0.0101,  0.1841,  0.1592, -0.2277,  0.1031, -0.0135,  0.2257,  0.2086,
         0.2408,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.1234, -10.9434,   2.1823,  -0.0775,  -0.8873,   0.7560,   0.3271,
          1.0009,  -1.0143,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4312,  5.1403, -0.4290, -0.6512, -0.1276, -0.0761,  0.1764,  0.3922,
         0.3778,  0.1985,  0.3198,  0.2298, -0.1370, -0.1295,  0.0913, -0.1257,
         0.0324, -0.1165, -0.1228, -0.1743,  0.0675, -0.0427,  0.0059, -0.1654,
        -0.0367, -0.0699, -0.1425,  0.0530,  0.1053,  0.0591,  0.0709,  0.0060,
         0.3066, -0.0847, -0.0181,  0.0162,  0.1027,  0.0754, -0.0447,  0.1346,
         0.0796,  0.3461, -0.0496, -0.0224, -0.1622,  0.3441], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2072, -9.3459,  0.6699, -0.0209, -0.1213, -0.0861, -0.1350,  0.6030,
         0.2443, -0.7457,  0.2137,  0.1355, -0.0346, -0.0522,  0.0176,  0.0780,
         0.0467,  0.3665,  0.1619, -0.1303, -0.0454,  0.0454, -0.0438,  0.1441,
         0.1500,  0.0107, -0.6168, -0.2608,  0.0954,  0.2632,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.9807, -12.0901,  -0.5242,   0.8339,  -0.2588,  -0.1096,  -0.5729,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5673e+00, -1.6215e+01, -7.2575e-02,  1.7109e-01,  3.1079e-02,
        -1.0223e-03,  1.2579e-03,  2.7885e-01, -3.8955e-01,  8.8003e-03,
         2.6440e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9251, 11.9456,  0.6405,  1.7769, -0.5515,  0.7984, -0.0975, -0.6498,
        -0.0167, -0.3285, -0.0461,  0.0270, -1.2024, -3.1501,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.1041, -12.3847,  -1.0235,   0.0493,  -0.3175,  -0.2624,   0.0164,
          0.4278,   0.0538,   0.0399,  -0.0242,   0.1287,   0.0306,   0.2030,
          0.0890,   0.3478,  -0.1621,   0.0423,  -0.0201,  -0.3724,  -0.1023,
         -0.0166,   0.1210,   0.0303,   0.1496,   0.2778,  -0.2148,   0.1020,
          0.1050,   0.4671,   0.5424,   0.1315,  -0.1987,   0.4402,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.3030e-01, -1.1554e+01, -1.0980e+00, -2.2649e-02,  1.0264e-01,
        -2.5985e-01, -1.6365e-01,  1.9458e-01,  2.2057e-01, -2.6805e-01,
         6.8758e-02,  9.6029e-02, -1.1759e-02,  9.5917e-03,  5.0140e-01,
        -9.6406e-02,  1.3495e-01,  4.4321e-01, -5.9206e-02, -5.4943e-02,
         2.6070e-01, -2.8311e-01, -1.0419e-01, -1.0659e-01,  4.8069e-02,
         6.7843e-02,  1.6080e-01,  9.8002e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4685e+01,  2.1576e+01,  1.1465e+00, -9.0197e+00, -2.0800e+00,
         4.9737e-01,  6.6338e-01,  1.5989e-01,  1.5731e-01, -1.3053e+00,
         1.0464e-01, -1.2937e+00,  1.0189e+00,  1.3998e+00, -1.5755e+00,
         3.6962e-01, -2.8979e+00,  4.4992e-01, -3.8722e-01,  8.2596e-01,
         2.0799e-02, -3.1351e-01,  6.1709e+00, -1.1081e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 6.2119e-01, -7.9272e+00, -7.0517e-01, -7.4705e-01, -4.1459e-01,
         8.5444e-02,  2.6799e-01, -4.5962e-02, -2.7469e-04, -2.6484e-01,
        -1.7138e-01,  4.6818e-01, -2.5301e-01, -2.4752e-02, -3.3033e-02,
        -2.4195e-01, -8.0126e-02,  1.6475e-01,  4.8322e-01,  4.0244e-03,
         1.5380e-01,  7.2278e-02,  1.8093e-01,  2.0218e-01,  7.6821e-02,
        -7.7126e-02,  2.6946e-01, -4.3539e-02,  3.0857e-01,  9.8382e-02,
         1.0185e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.1201, -11.6021,   1.2258,   1.1661,   0.9776,   0.4172,  -0.2949,
          0.9270,   1.0823,   0.4476,   0.7745,   1.6080,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.1535, -13.6237,  -0.5252,   0.6346,  -0.3433,  -0.1311,   0.4235,
          0.1203,  -0.2679,   0.0963,  -0.2458,   0.1302,  -0.7705,   0.7349,
          1.1942,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0938, -3.6854, -0.0995,  0.1604, -0.2248, -0.1648,  0.4214,  0.0064,
        -0.1441, -0.0441, -0.0499,  0.0247, -0.0400,  0.1039,  0.0934, -0.0740,
        -0.0672,  0.0233,  0.0235,  0.0396, -0.0171,  0.0195, -0.1389, -0.0362,
        -0.1719,  0.0761,  0.0642,  0.0303,  0.0325,  0.0656,  0.0631, -0.0044,
        -0.0755], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.9717, -12.7686,  -0.2712,   0.5677,  -0.1655,   0.1353,   0.0386,
         -0.1432,   0.1143,  -0.3128,  -0.3163,  -0.0227,  -0.3189,   1.6226,
          0.4361,   0.6642,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4322,  7.0312,  0.3921, -2.4685, -0.5559, -0.1404,  0.4642,  0.6518,
         1.1865,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6910e+00,  4.8450e+00, -2.0197e-01, -1.0238e-03, -5.6712e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5320e-01,  4.4699e+00, -4.0869e-01, -1.4059e-01,  9.5909e-02,
         7.4994e-02,  1.1030e-01, -1.4129e-01, -3.1065e-01, -3.0813e-02,
         1.4620e-01,  2.0932e-01, -2.4245e-01,  7.8506e-01,  1.0668e-01,
         1.7236e-01,  1.8903e-01,  7.4966e-02,  2.9880e-01,  1.7040e-01,
         3.4189e-02,  1.6897e-01,  1.6047e-01, -1.7570e-01,  4.3871e-03,
         2.4425e-01,  4.0180e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2993,  6.0367, -0.1138,  0.3977, -0.2691, -0.1541,  0.0967,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.7090e-02,  1.4272e+01, -9.5388e-01,  4.1206e-02,  2.3001e-01,
         7.8130e-01,  8.3630e-02,  2.3353e-01,  9.3273e-02, -3.5117e-01,
         3.1864e-02,  7.1929e-01, -6.8828e-02,  1.0616e-02, -3.3068e-01,
        -3.1063e-01,  4.7737e-01, -6.6857e-01, -2.0350e-01,  5.1487e-01,
        -1.5642e+00, -9.9165e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6677, 10.3062,  0.4437,  0.0739, -2.2864, -0.0694, -0.5104, -0.0700,
         0.0142, -1.0066, -0.3337, -0.3732, -0.3508,  0.1148, -0.2803,  0.1692,
        -0.1224, -0.6592,  0.1408,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7181, 14.9726,  0.2019, -0.1358, -0.0941, -1.0948,  0.2695,  0.0940,
         0.1538, -0.1465, -0.1937,  0.1128, -0.1021, -0.2900, -0.0332, -0.1541,
        -0.8294, -0.1128, -0.0931,  0.2377,  0.3533,  0.2779,  0.0359,  0.1994,
        -0.0630,  0.0936,  0.0169, -0.1399, -0.0464,  0.1467, -1.1864,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-3.9209e+00,  1.1986e+01,  1.0741e+00, -4.1116e-01,  1.8008e-01,
        -1.5649e-01,  2.4930e-02, -2.7100e-02, -1.9123e-01, -2.0012e-01,
        -2.3029e-03,  4.1885e-01, -1.8481e-02,  4.3783e-01,  3.8986e-03,
         8.5400e-01,  1.7093e-01,  1.0649e-01, -6.2312e-01, -8.4533e-02,
        -1.9624e-01, -2.0304e-02, -3.9211e-01, -2.6095e-01,  7.3693e-01,
         2.7779e-01,  3.9023e-01, -1.9240e+00, -1.5902e-01,  7.5052e-01,
        -4.0029e-01,  6.5503e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2649, 10.4597,  2.0713,  0.0532, -0.1619, -0.1875,  0.0469, -0.1683,
         0.6641,  0.1798,  0.4916,  0.1293,  0.1365, -0.3165,  0.1187, -0.3167,
        -0.4999,  0.1946, -0.1168,  0.1324,  0.6309,  0.3353,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2616e-02,  6.8443e+00, -1.4323e-01, -5.0186e-01, -7.0242e-02,
         3.7627e-02, -3.2966e-01,  2.6143e-01,  1.6840e-01, -1.0241e-01,
         3.9606e-02,  1.4133e-01,  1.0099e-01,  1.4198e-02, -9.0371e-04,
        -1.1601e-01,  1.0542e-01,  5.9737e-02, -1.0984e-01,  2.1807e-02,
         6.5348e-02, -1.7739e-01, -1.1973e-01, -2.7212e-02, -1.5341e-02,
        -6.7953e-02,  9.0571e-02,  1.1299e-01,  6.5006e-02, -1.7020e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4383,  4.9463,  0.2253, -0.0229,  0.1411,  0.0552,  0.1338,  0.0337,
        -0.0609, -0.2027,  0.0552,  0.0585,  0.1771, -0.0880,  0.0447, -0.1866,
        -0.0850,  0.2863,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3360e-01,  8.9977e+00,  1.8731e+00,  6.2515e-01, -3.5585e-01,
         7.1930e-01,  4.4061e-01, -1.7442e-01,  1.0583e-01, -1.0409e-01,
        -2.5096e-01, -3.5390e-02, -6.2967e-02, -2.1602e-01,  5.0883e-02,
        -8.7400e-03, -1.5626e-01, -4.8733e-01,  1.4087e-01, -1.7439e-01,
        -2.0636e-01, -2.3379e-01,  1.4457e-01,  1.2003e-01,  9.1585e-02,
         1.4233e-02, -1.0756e-01, -5.8136e-03,  1.1068e-01,  6.8218e-02,
         2.7621e-02, -1.0234e-02, -3.0133e-02, -1.7631e-01,  1.0442e-01,
        -2.7361e-01,  7.3797e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.6694e-01, -1.1398e+01, -3.7349e-01, -7.3402e-01,  1.6376e-01,
         1.1131e-01,  4.2893e-02,  4.7204e-01,  2.8377e-02,  1.2075e-03,
        -2.6483e-01,  3.5758e-01,  6.3618e-01, -2.1788e-01,  1.6491e-01,
         1.2306e-02,  1.8463e-01,  1.1634e-01,  1.0539e-01,  3.5157e-01,
         2.2273e-01, -6.2497e-01, -5.6451e-01, -4.4162e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.5534e-02,  6.1969e+00,  6.9214e-01,  3.9273e-01,  8.5583e-01,
         1.5233e-01, -5.4087e-03,  1.5380e-01, -2.3177e-02,  7.1934e-02,
        -1.6207e-01, -2.4952e-01, -1.1002e-01, -7.1571e-02,  1.9836e-01,
        -5.7116e-02, -7.3514e-02, -3.2118e-02,  7.5075e-03,  1.2917e-02,
        -5.6441e-02, -1.6902e-01, -1.0268e-01,  1.3087e-01, -7.2250e-02,
         1.6580e-01, -4.3953e-02,  7.8900e-02, -1.0497e-01,  5.0352e-03,
         3.1385e-01, -7.1147e-02, -1.1864e-01,  2.1293e-01,  1.5308e-01,
        -2.6797e-01, -2.4962e-01, -9.7364e-02, -1.0109e-01, -3.5990e-02,
        -1.1538e-01, -1.0418e-01, -4.9119e-02,  8.5209e-03, -4.3609e-01,
        -9.8868e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3872,  8.9842,  2.3583,  0.4031, -0.7094,  0.0622,  0.0711,  0.0691,
        -0.1652,  0.3197, -0.3626,  0.1166,  0.7913, -0.2815, -0.3205,  0.0370,
        -0.2765, -0.2046, -0.4170,  0.0237,  0.4281, -0.7722, -0.9209,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2170, -9.5118, -0.8151, -1.0958, -2.3287,  0.1432, -0.3012,  1.4986,
         0.8407,  0.9756,  0.0710,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0118, -9.7460, -1.7646,  0.3272,  0.2884, -0.0718, -0.3499, -0.1310,
        -0.1636, -0.1453, -0.2986,  0.1319,  0.6046,  0.5631,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8764,  8.5231,  0.7560, -0.0118,  0.0590, -0.0788, -0.3382, -0.1067,
        -0.1849, -1.3531,  0.4338,  0.1881,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4342e-01,  5.5549e+00,  4.4437e-01, -4.3297e-01,  6.1383e-02,
        -2.4262e-02, -6.5904e-02,  2.2766e-01,  2.6427e-01, -2.6520e-02,
        -4.9574e-02, -2.3924e-01, -3.9922e-02, -7.6548e-02, -6.6710e-02,
        -1.1049e-01, -9.3252e-02, -1.0815e-01, -1.2665e-01, -3.6749e-01,
        -8.5408e-02, -6.0605e-02, -9.1041e-02,  1.3088e-01,  1.2554e-01,
         4.3759e-03, -1.0301e-02, -6.3849e-02,  5.7076e-02, -8.2501e-02,
         4.9861e-02, -7.5163e-02, -1.6277e-02,  3.3138e-02, -1.1590e-01,
        -7.9418e-02, -2.9191e-02,  1.8173e-03, -7.5319e-02, -1.1825e-01,
        -2.3281e-02, -1.4038e-02,  1.6175e-01, -1.4495e-02, -7.9941e-02,
         1.3294e-01, -1.1385e-02, -9.4497e-02, -5.5095e-02, -1.2225e-01,
         1.6206e-02,  7.1800e-02, -5.1394e-02,  2.3889e-02,  2.6433e-02],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ -0.1035, -19.0417,  -2.2129,   0.4196,   0.7784,   0.5422,  -0.0639,
          0.2944,  -0.3494,  -0.4459,  -0.1819,   0.0713,  -0.1994,  -0.3130,
         -0.5518,   0.2079,   0.5273,   0.2547,  -0.0226,  -0.2895,   0.1135,
         -0.1376,   0.7897,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2964, -6.7615, -0.6491,  0.1599,  0.1694, -0.1931,  0.1303, -0.4620,
        -0.0780,  0.1160,  0.2538, -0.0279,  0.3271,  0.5088,  0.2622,  0.2618,
         0.3788, -0.1803,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9037,  3.7382,  0.1583, -0.1818,  0.0367,  0.2203,  0.0495, -0.1142,
        -0.1706, -0.1572, -0.1196, -0.0829,  0.1134, -0.0399, -0.1301, -0.0832,
        -0.0228,  0.0531,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0610, -6.6413,  0.0816, -0.0179,  0.0309,  0.2069,  0.4916,  1.2799,
         0.5029,  0.1400,  0.3039,  0.5250, -0.3083,  0.5554,  0.8265,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2643e-01,  1.4790e+01,  1.2320e+00,  5.7289e-01,  3.5741e-01,
         8.8342e-02,  6.8178e-01, -5.2657e-01,  1.7317e-01,  2.4756e-01,
         3.8434e-01,  1.3690e-02,  2.7966e-01,  4.0198e-01, -2.2673e-01,
         2.7411e+00, -3.5299e-01, -2.9375e-01,  1.5670e-02, -9.2350e-02,
         4.7832e-02, -4.2575e-01,  1.5356e-01, -4.1454e-02, -1.9127e-01,
         1.6617e-01,  1.0442e-01, -2.8763e-01,  4.4731e-03,  6.6016e-01,
         2.8820e-01,  6.9219e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5592e-01,  6.6357e+00,  1.2402e+00, -6.2996e-01, -2.8195e-01,
         4.9589e-02, -1.0940e-01, -2.6445e-01, -2.9103e-01,  7.2611e-02,
        -1.9264e-01, -3.1384e-01,  3.9210e-01,  4.0548e-01,  2.7943e-02,
         3.1389e-02,  4.3072e-02, -4.9049e-01, -1.4546e-01,  1.5119e-01,
        -6.5696e-01, -1.1942e-01, -5.6238e-01, -1.3849e-01, -1.4228e-01,
        -6.7741e-02, -2.3445e-01,  3.3700e-01,  4.3445e-03, -3.3378e-01,
        -2.8461e-02,  7.8376e-02,  7.4522e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.3753e-01,  1.0328e+01,  1.0488e+00,  1.1362e+00,  2.0225e-01,
         4.4729e-02, -7.2345e-02,  1.3730e-01, -8.5559e-01, -1.2071e+00,
        -7.9638e-02,  3.8753e-01, -1.4308e-01, -2.7044e-01, -6.2988e-02,
         3.4058e-01,  1.2845e-02, -5.9671e-03, -1.2669e-01, -9.3524e-02,
        -4.6782e-02,  5.4727e-03, -5.6514e-02, -3.5625e-01,  3.8994e-02,
        -4.1162e-02,  1.1702e-01,  2.0475e-01,  7.0291e-02,  1.6690e-03,
         3.1089e-02, -5.4957e-02, -1.4894e-02, -6.1526e-02,  3.7161e-02,
        -2.1750e-01, -5.9767e-02,  1.9538e-02,  1.4606e-02,  3.3652e-02,
        -1.3086e-02, -3.0631e-02,  1.2641e-02, -1.2508e-01,  4.3596e-02,
         1.7087e-01, -2.6552e-02, -2.1017e-01,  9.9355e-02, -7.3996e-02,
        -1.1266e-01,  2.8859e-02,  1.2343e-01,  3.0821e-02, -2.8707e-02,
        -4.9336e-03,  1.1385e-01,  1.9993e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6072, -7.0901, -0.2689,  0.3655,  0.1800,  0.1047,  0.2232,  0.1939,
         0.1210, -0.0084,  0.5244,  0.0917,  0.0286,  0.3737, -0.1200, -0.1005,
        -0.1926,  0.1344,  0.0293,  0.1297, -0.0204, -0.2792,  0.2554, -0.1376,
         0.7427,  0.5906,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0180e-01, -8.6500e+00, -3.4928e-01, -7.3255e-01, -1.0790e-01,
        -1.6924e-01,  3.3565e-01,  7.5553e-02,  4.2155e-01,  4.7893e-01,
        -1.8956e-01,  1.0184e-03, -2.2440e-01, -4.1236e-02, -1.0067e-01,
         1.6440e-02,  2.6785e-01, -2.8135e-02,  1.1257e-02, -8.5852e-02,
        -6.4532e-02,  2.2949e-01,  6.4822e-01,  1.5230e-02,  1.2716e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9572e+00, -8.7012e+00, -8.3518e-01, -1.4124e-01,  8.1259e-01,
        -4.7213e-02,  7.4674e-02,  7.3916e-01,  1.6807e-01,  1.5598e-01,
         2.1087e-01,  6.1700e-02, -4.0644e-02, -7.4969e-01, -4.3140e-01,
         1.5538e-01, -5.6268e-02, -1.5037e-01, -2.2342e-02, -4.5599e-02,
         5.9478e-02,  8.6438e-02,  1.2373e-01,  1.7248e-02,  1.3142e-01,
         2.4890e-01,  1.1834e-02, -1.1137e-01,  6.5997e-02,  7.5230e-02,
         3.5384e-03,  3.3711e-02,  2.6016e-01,  7.3476e-02,  8.0708e-02,
         2.0328e-03,  1.0304e-01, -2.0682e-01,  8.7464e-02,  4.9059e-02,
         9.4846e-02, -3.8673e-02, -1.1625e-01,  5.1526e-01, -3.1410e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2411,  4.5842,  0.2027,  0.3725,  0.4391, -0.2027,  0.0525, -0.0381,
        -0.3021, -0.0238, -0.1169, -0.1336, -0.0493, -0.1014,  0.0817,  0.1364,
        -0.1866, -0.0457, -0.1173,  0.1487,  0.2686, -0.2845, -0.0612,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1657,  9.7006, -1.2729, -0.2002,  0.0400,  0.2907,  0.8190, -0.0216,
        -0.5107,  0.0105, -0.2644,  0.2750,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 2.5874e-01, -1.0793e+01, -2.3778e+00, -6.1597e-01, -9.3741e-02,
        -1.0736e+00, -5.3601e-01,  6.1888e-03, -5.6339e-01,  3.5310e-01,
         2.1935e-02, -2.3141e-01, -5.9469e-02, -6.3347e-02,  9.5907e-02,
        -4.8794e-01, -1.8015e-02,  1.9611e-01,  1.1369e-01,  1.2463e-01,
         1.5757e-01,  1.5195e-01,  7.0422e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5833e+00,  9.1210e+00,  5.7715e-01, -1.4923e-01, -1.1809e+00,
         1.1422e-01,  5.4202e-01, -1.4539e-01,  3.9267e-01,  7.4360e-02,
        -5.8897e-02,  1.3759e-02, -6.9705e-02,  3.5985e-01, -2.9161e-01,
        -9.8495e-02,  2.6670e-01, -6.6908e-01,  1.9733e-01,  4.8991e-03,
         2.7382e-01,  6.9775e-02,  2.1627e-01, -1.9722e-01,  7.6938e-02,
         1.2625e-01, -2.8713e-02,  1.6506e-01,  7.2812e-02,  2.0684e-01,
         5.7953e-02,  5.3960e-01,  1.3124e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7890e+00, -9.3963e+00, -1.6473e-01,  8.8277e-03, -6.2787e-01,
        -3.6459e-01,  3.2399e-02, -2.2679e-01, -6.6881e-01, -9.9201e-02,
        -4.8240e-01,  1.9583e-01,  3.3539e-01, -1.8417e-01, -5.5287e-01,
         8.4444e-01,  1.6807e-01,  4.0630e-01,  2.2182e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8330, -5.0022, -0.0152,  0.0091, -0.0553, -0.2626,  0.2241,  0.1334,
         0.0374,  0.0053,  0.1817, -0.3779,  0.1112,  0.0516,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4135e-01,  1.3809e+01, -3.6924e-01, -3.6832e-02,  4.0972e-01,
         1.2209e-01, -4.1899e-01,  1.1446e-01, -2.2396e-01, -8.3527e-02,
         4.3845e-01, -7.7510e-03,  3.9180e-01, -8.5708e-01, -1.7337e-01,
         1.1441e-01,  4.8033e-01,  3.4193e-01,  5.1342e-02,  2.9168e-01,
         7.4860e-01,  5.8629e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2927,  9.6620,  0.1696,  0.0532, -0.2147,  0.0713,  0.3309, -0.2889,
         0.4363,  0.1493, -0.1925, -0.4152, -0.2907, -0.0906, -0.0137, -0.0349,
        -0.3795, -0.0301,  0.0867, -0.2627, -0.0221,  0.3383, -0.1477, -0.0876,
        -0.1330, -0.1544, -0.1666,  0.0673,  0.1859,  0.0950,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0134e-01,  4.9769e+00,  5.7563e-01,  6.8550e-01,  2.4666e-01,
         1.0072e+00,  1.8215e-03, -2.0179e-01,  3.7626e-01,  1.7765e-01,
         3.5399e-02, -3.3057e-01, -1.1265e-01, -2.5688e-01,  7.8170e-02,
        -2.4467e-01, -1.3277e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6342e+00, -6.7605e+00, -5.1640e-01, -2.2203e-01, -4.7590e-03,
         1.6394e-01,  4.9000e-01, -1.3472e-02,  1.4141e-01,  3.3802e-01,
        -4.0020e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.6656e-01,  1.6416e+01,  1.6691e+00, -2.4733e-01,  1.1561e-03,
        -1.8391e+00,  4.5996e-01,  8.7338e-02,  1.3743e-01,  4.0003e-01,
        -2.1861e-01, -2.4183e-01, -1.8024e-01,  6.2026e-01,  6.2673e-01,
        -2.6683e-01,  1.3922e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5734, 10.6916,  0.2803,  0.2421,  0.0159,  0.0925, -0.1170,  0.0615,
         0.1009, -0.2292, -0.2195,  0.1078, -0.3105,  0.0489, -1.0333, -0.0653,
         0.0941, -0.2292, -0.5115, -0.0161, -0.1468, -0.4037,  0.0840, -0.2115,
        -0.0561, -1.1540,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9370, 11.0609,  0.1954, -0.5117,  0.2019, -0.6064, -0.9202,  0.2792,
         0.5365, -0.0949,  0.3856, -0.0365, -0.2479, -0.0224, -0.0182,  0.0422,
        -0.1834,  0.3353, -0.2868, -0.0584, -0.3217, -0.3467, -0.0143,  0.1991,
        -0.5941, -0.2731, -0.0820, -0.1058, -0.6693, -0.9151,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7635e-01, -7.5897e+00, -3.8336e-01,  1.9062e-01, -7.0163e-01,
         6.1035e-02, -3.0557e-03,  2.8879e-01,  6.3199e-02,  1.0472e-01,
         1.8976e-01, -4.2875e-02,  2.6146e-01, -1.8771e-01,  2.5278e-01,
        -1.4201e-01,  3.9182e-01, -1.7323e-01, -5.5116e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 5.3758e-02,  1.0758e+01,  1.2503e+00,  1.1936e+00, -1.3325e-01,
        -2.2274e-02,  1.6901e+00,  1.4139e-01,  4.9428e-01,  8.5739e-02,
        -3.6153e-02, -4.9780e-02,  1.8903e-01,  2.3716e-01,  7.8183e-03,
        -3.4161e-01,  1.5815e-01,  3.2838e-01,  9.2675e-03,  1.2697e-01,
        -6.1747e-02, -7.4140e-02,  7.1254e-02,  4.8384e-02, -3.1673e-01,
         2.7542e-01,  4.5055e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1036,  7.9884,  0.3291, -0.8154, -0.7338, -0.0468,  0.0880,  0.0831,
        -0.0862, -0.1924, -0.0945, -0.4499,  1.2963, -1.1085,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5525,  4.8588,  0.1933,  0.2096,  0.0774, -0.0707, -0.0877, -0.0663,
         0.0742,  0.0315, -0.3642,  0.0863, -0.1731,  0.0634, -0.2182,  0.0168,
        -0.1126, -0.0883, -0.0892,  0.0871, -0.0797, -0.0424, -0.1730, -0.2350,
        -0.2128, -0.6277,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9381, -3.7843, -0.3744,  0.4203, -0.0444,  0.0157,  0.0793,  0.0489,
         0.3127, -0.0617,  0.1343,  0.0214, -0.1924, -0.4015,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6578e-01,  6.5473e+00,  6.6966e-02,  1.9512e-02,  1.1425e-01,
        -3.7672e-01,  1.3424e-02, -1.8084e-01,  4.4129e-02, -1.1863e-02,
         6.8915e-02, -2.7968e-01, -1.3747e-01, -3.7633e-01,  2.6928e-01,
        -1.7890e-01, -3.0507e-01,  1.5997e-01, -2.5608e-02, -2.6015e-02,
        -1.3016e-01, -1.3722e-01, -1.7294e-01,  6.7123e-01, -3.4485e-02,
        -6.0707e-03, -1.3773e-01, -1.7622e-01,  6.3812e-02, -2.9835e-01,
         5.3941e-01,  7.6258e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3259,  8.6437,  0.7369, -0.2009,  0.0582,  0.1402, -0.1932, -0.0746,
         0.1513, -0.1158,  0.2134, -0.0293, -0.2257,  0.0294,  0.0546,  0.0375,
        -0.2958, -0.1559, -0.2428, -0.1026, -0.1198, -0.1813,  0.0451,  0.1283,
        -0.4993,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5703,  4.8074, -0.1994, -0.3209, -0.1427, -0.2875,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.8959e-01,  6.6973e+00, -4.0042e-02,  2.2580e-02, -2.3482e-01,
         3.0254e-01,  1.8836e-01, -2.9574e-02,  8.7625e-02, -1.3068e-01,
         1.0333e-01,  3.9091e-01,  3.0613e-01, -7.0063e-02,  8.1874e-03,
        -1.0561e-01, -4.1963e-01,  1.0183e-01,  1.6648e-02, -5.8753e-02,
        -9.1662e-02, -5.7364e-03,  2.1636e-01, -6.6888e-02, -1.3033e-02,
        -8.2355e-03,  7.0009e-02, -1.4323e-03, -1.4102e-02, -1.1903e-01,
         2.1160e-01,  1.2433e-01,  5.7391e-02,  1.0826e-01,  1.2396e-01,
         3.0314e-01,  2.0610e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0539e-01, -6.5501e+00,  3.3259e-01, -1.2952e+00,  4.2059e-01,
        -4.0090e-01, -2.6409e-01,  2.6980e-01, -3.1623e-02, -5.5086e-02,
        -1.0158e-01, -2.7384e-01, -7.3305e-02,  2.7798e-02, -4.3962e-01,
        -4.2362e-01,  1.5235e-01,  1.6914e-01,  9.9127e-02,  3.5976e-03,
         3.8291e-01,  1.4630e-01,  8.6271e-02,  8.5596e-01, -5.0087e-02,
         1.5433e-01, -2.7380e-01,  2.0591e-01,  2.6535e-02, -6.2266e-03,
        -1.1379e-01,  1.7245e-01, -2.6630e-01,  2.0619e-01, -1.0183e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.5248, -14.2846,   5.6757,  -0.1139,  -0.0870,  -0.2818,   0.1530,
         -0.0684,   0.0303,  -0.2188,  -0.0662,   0.2289,   0.3719,  -0.6774,
         -0.5693,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4554, 10.1963,  1.9815, -0.7214, -0.3795,  0.2608,  0.1374, -0.3771,
         0.4727,  0.5474, -0.1035, -0.0913,  0.0116,  0.0888,  0.2213,  0.3520,
        -0.0130,  0.6561,  0.0450,  0.1066,  0.0154, -0.1412, -0.1598, -0.0465,
        -0.0298, -0.1502,  0.0906,  0.0327, -0.1123, -0.2299,  0.0956,  0.5000,
         0.5068,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7428, 19.3144,  1.5153,  0.2623,  0.2069,  0.1803, -0.0519,  0.0875,
        -0.2608, -0.1276, -0.3402, -0.3776, -0.1755, -0.0982, -0.8583, -0.3346,
        -0.0206,  0.0233,  0.0782, -0.5753, -0.7764, -0.1269,  0.0475, -0.1730,
        -0.2200, -0.1174, -0.5708, -0.6987, -2.0530,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.9909,  6.0355, -0.3413, -0.3500,  0.2087,  0.0765, -0.1577, -0.1790,
        -0.0485, -0.2893, -0.1101, -0.1060, -0.0672,  0.0193, -0.8056,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.1559e-01,  7.2378e+00, -9.1766e-02, -4.5363e-01,  3.8977e-01,
        -3.8249e-02,  2.1454e-01,  8.3509e-02, -1.9719e-01, -2.4620e-01,
        -5.5181e-02, -1.0712e-01, -1.1952e-01,  6.0975e-02,  3.7605e-02,
         7.3784e-02,  2.8839e-02, -1.5081e-02,  3.3683e-02, -8.1116e-02,
         1.1121e-01, -9.1487e-02, -5.8297e-02, -7.8694e-02, -4.5516e-02,
        -1.9404e-02,  5.1266e-02, -1.7546e-01, -4.2867e-02,  3.2887e-02,
         2.9753e-02, -1.6484e-02,  1.5085e-02, -5.5271e-02, -2.3573e-02,
        -3.1093e-02,  6.5434e-02,  3.7685e-02,  7.8871e-02, -3.4830e-02,
         3.1038e-03,  2.7770e-02,  6.0860e-02, -7.2130e-02,  2.8534e-02,
         1.2835e-01,  2.1431e-02, -9.9415e-02,  4.0693e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.0604e-01, -2.2992e+01, -8.2976e-01,  2.7504e+00,  1.3475e+00,
        -1.5134e+00,  5.7665e+00,  2.3937e+00,  3.5433e+00,  2.8436e+00,
         3.2880e-01, -3.9625e-01, -5.1730e-01,  1.4117e-01,  6.4930e-01,
         4.6350e-01,  1.3009e-02,  1.3034e-01,  1.2807e-01, -1.3797e-01,
         3.4938e-01,  8.6324e-01,  2.3907e-01,  5.1427e-01,  2.4363e-01,
        -9.3328e-02,  6.2856e-02, -9.9459e-01, -1.7481e-01,  5.1106e-01,
        -6.8080e-01, -2.8629e-01,  4.5331e-02, -2.9664e+00, -1.5782e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0603,  8.2437,  0.4679, -0.0113,  0.2600,  0.0880, -0.2853,  0.1939,
        -0.2218,  0.1460, -0.5567, -0.0953,  0.0306,  0.2407, -0.0591,  0.2584,
         0.0488, -0.0387,  0.1850,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4918e+00,  1.0850e+01, -4.5713e-01,  6.1914e-01, -5.8086e-03,
         3.3688e-01, -4.0038e-02, -2.2934e-01,  6.8715e-01, -3.4508e-01,
         8.2559e-02, -2.5337e-02,  2.3682e-01, -1.5662e+00, -1.5524e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6364, -6.3821,  0.3158,  0.0422,  0.2628,  0.2418,  0.0414,  0.1912,
        -0.0862,  0.1761,  0.2357,  0.2991,  0.0780,  0.1075,  0.2519,  0.2250,
         0.0209,  0.2495,  0.0098,  0.0076,  0.5306, -0.0985,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.7375e-03,  4.9630e+00,  4.0803e-01,  2.7681e-01,  2.3934e-01,
        -8.7427e-03, -4.0942e-01,  3.3961e-02, -5.5646e-02,  8.3094e-02,
        -9.3319e-02,  9.2813e-02, -4.7027e-03, -3.6113e-01, -2.2868e-01,
        -6.5356e-02, -3.5881e-02, -1.1733e-01,  9.9786e-02,  3.7601e-02,
         1.2100e-01,  9.3102e-02, -1.2344e-01,  3.6894e-02, -4.3376e-01,
        -6.4420e-02, -6.3390e-02,  1.6358e-02,  6.0198e-02,  6.7843e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([20.9992, 17.1259, -5.1914, -1.0642,  1.9391,  0.9305, -5.4892, -6.9559,
        -0.1460,  2.8683, -4.9152,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9469e+00, -1.6900e+01,  6.1166e-01,  1.3200e+00,  4.6612e-01,
         6.8474e-01, -8.6175e-01, -7.9645e-02, -1.4954e-01,  2.6702e-02,
        -1.6483e-01, -3.6077e-01,  6.8628e-01, -1.9365e-01,  6.5692e-02,
        -7.8759e-03,  2.4987e-01,  3.0886e-01,  5.1649e-01, -2.0979e-01,
         5.4124e-01,  5.3564e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3466e+00, -1.6652e+01, -2.8927e+00,  1.8796e-01, -1.3505e-01,
        -1.2092e+00, -3.8363e-01,  4.4876e-03,  5.5238e-03,  1.0509e-01,
         3.6696e-01,  8.9294e-01,  6.2804e-02,  3.3826e-01, -1.2772e-02,
         2.1414e-01,  1.5422e-02, -2.0093e-01, -7.9730e-02,  2.0696e-02,
        -9.0085e-02,  1.4277e-01,  4.5193e-01,  3.7508e-01,  1.3277e-01,
         1.1980e-01,  1.2185e-02,  2.5034e-01,  9.2784e-01,  3.4368e-01,
         3.4346e-02, -4.3545e-01, -6.6409e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.6223e-01, -7.9300e+00, -7.2960e-01,  6.9349e-01,  3.1519e-01,
        -1.4525e-01, -1.5861e-01,  1.7950e-01,  9.8654e-04,  5.1001e-02,
         1.1649e-01,  6.6122e-02,  2.9856e-01,  3.6752e-01,  2.3498e-02,
        -3.8724e-01,  1.1661e-01,  1.1958e-01,  3.7995e-02,  7.8393e-02,
         1.8337e-01,  1.1850e-01, -3.2269e-02,  2.1100e-02,  4.4790e-02,
        -3.2206e-02,  1.1908e-01,  3.4386e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4176e-01, -8.6853e+00, -1.7104e+00, -4.0375e-02,  6.8320e-02,
         1.6895e-01,  7.9934e-02,  3.5031e-01, -4.1829e-01, -3.3197e-02,
         1.3979e-01,  2.2323e-03,  2.5116e-01,  1.3237e-01,  1.5317e-01,
        -3.6912e-02, -7.0175e-02,  7.6950e-02,  1.4726e-01,  2.2669e-02,
         3.2713e-01, -7.3352e-02, -2.7947e-01,  5.7319e-02,  6.9081e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 4.2778e-01, -6.8763e+00, -2.7955e-01,  2.4329e-01,  3.4526e-01,
         1.6162e-01,  1.6138e-01,  1.1113e-01,  1.4986e-01,  1.1382e-01,
         1.3647e-01,  1.4929e-01,  4.9274e-02,  3.8083e-01, -5.7688e-03,
         1.3748e-01,  1.1536e-01, -9.5128e-02, -7.4589e-02, -9.3502e-03,
        -2.3188e-02,  1.3379e-01,  7.9111e-02,  2.0472e-02, -1.8461e-01,
         2.5126e-02,  1.0254e-01,  5.5256e-02,  6.0479e-02,  1.1405e-01,
         1.1465e-01,  8.3596e-02, -3.2464e-02,  5.0483e-02,  5.3131e-02,
         9.7520e-02,  9.6362e-02,  3.2091e-02,  1.0212e-01, -1.2641e-01,
        -1.8090e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0621e-01, -1.6925e+01, -2.5718e+00,  4.2755e-01, -5.9052e-01,
         1.8226e-01, -1.1748e-01, -1.4051e-02,  2.4445e-01, -2.4022e-01,
         1.3292e-01, -9.4158e-02,  9.1745e-02,  1.5896e-01, -2.7134e-01,
         9.6485e-02,  2.4319e-01, -6.4003e-02, -5.6888e-02, -3.6282e-01,
         1.8133e-01,  5.1243e-01,  3.0168e-03, -3.1967e-01,  5.3241e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4394,  7.4033,  0.2645,  1.2129, -0.2605,  0.2521,  0.0589, -0.0467,
        -0.2155, -0.5666,  0.1633, -0.1151,  0.0395, -0.2882, -0.0324, -0.1503,
         0.0140, -0.3402,  0.1343,  0.2233,  0.2376,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.0050, -13.6869,  -0.6608,  -0.6472,  -0.1906,  -0.1601,  -0.5249,
          0.1028,   0.0962,   0.2335,  -0.4199,   0.1980,   0.1427,  -0.0275,
          0.2061,   0.5832,   0.4538,   0.6669,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6767, 10.9550,  0.8088, -0.5546,  0.2285,  0.0374,  0.1953, -0.9600,
         0.0706, -0.0809, -0.0230, -0.1238, -0.1505,  0.1379,  0.1461, -0.4509,
        -0.3680,  0.5629, -0.1751,  0.5415, -0.1354, -0.0772, -0.1236, -0.1057,
         0.0715,  0.2862, -0.1082, -0.0631, -0.2682,  0.2466, -0.0770, -0.0636,
        -0.2202, -0.1245,  0.1027, -0.3654, -0.0178,  0.0901,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1138, -7.9011, -0.6762,  0.3429, -0.3482, -0.0840,  0.1721, -0.1486,
        -0.2823, -0.0939,  0.1333,  0.0196, -0.1360,  0.4160, -0.3566, -0.2622,
        -0.2047,  0.6430,  0.5011,  0.6006,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3574, -6.6520,  0.6540,  0.4145,  0.1032, -0.0226,  0.0769, -0.0239,
        -0.1315, -0.2294,  0.3122,  0.3717, -0.5900,  0.1971,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0353e+00,  6.3629e+00,  3.8085e-01, -4.3288e-01, -2.9714e-01,
         6.7467e-02,  6.1490e-01, -5.5398e-01,  2.5685e-01, -1.5949e-01,
         3.6049e-01,  7.9084e-02, -1.2256e-01, -1.0064e-01,  2.2612e-01,
         2.0511e-01,  2.6290e-01, -1.2171e-01, -2.6789e-01, -3.0780e-01,
         1.2743e-01,  2.1833e-01, -9.8264e-02, -1.2060e-01, -1.5091e-01,
        -3.1011e-01, -3.5394e-02,  2.5117e-03,  2.6469e-02, -2.0099e-01,
        -2.1783e-01,  2.6096e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6915e+00, -5.7611e+00,  1.5069e-01, -1.9382e-01, -1.5963e-01,
         2.8572e-02, -1.3006e-01,  2.9736e-01,  8.4206e-02, -1.3258e-01,
         6.4831e-02,  9.5663e-02, -1.3981e-02, -1.2164e-01, -6.9742e-04,
        -6.1530e-02, -1.7466e-03,  3.4791e-01, -3.0463e-01,  5.1556e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6797e-01,  6.5993e+00,  5.5306e-01, -1.2761e-01, -2.5482e-01,
        -4.9273e-02, -2.5093e-01, -1.6833e-01,  8.6647e-02, -3.6937e-02,
        -1.5711e-01, -2.2280e-01, -1.0126e-01, -5.2057e-02, -3.0108e-02,
        -1.0446e-01, -2.8024e-02, -9.3713e-03, -2.0125e-01, -1.0429e-01,
        -1.4935e-01,  1.8787e-02,  1.9675e-02, -1.3651e-01, -8.5031e-02,
        -1.1907e-01, -1.6386e-01, -2.7499e-01, -2.9687e-01, -2.4868e-02,
        -7.5930e-02,  1.0361e-01, -6.4191e-02,  6.3596e-02, -2.7607e-01,
        -9.9681e-02, -4.2068e-02,  1.9774e-02, -9.0475e-02, -4.8381e-02,
         1.1691e-01,  1.2903e-01, -5.2583e-02,  1.2905e-01,  9.8891e-02,
         5.1479e-03,  3.2429e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6027, -6.8454,  0.0825, -0.2565, -0.0316,  0.3013,  0.0129, -0.0311,
         0.6586, -0.2353, -0.1162,  0.5905,  0.4197,  0.5339, -0.3610,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6154e-02,  1.2790e+01,  1.3657e+00, -9.8279e-02,  1.2468e-01,
         1.4194e-01,  1.3548e-01, -2.3613e-01, -1.3837e-01,  8.2553e-03,
         1.3273e-01, -8.8644e-03,  7.6822e-02,  5.1317e-01,  3.3630e-01,
        -1.1508e-01, -7.2427e-02, -3.4163e-01,  4.0013e-01, -1.3185e-01,
        -1.0144e-01, -3.2923e-01, -3.0418e-01,  1.5871e-02, -1.0070e-02,
        -3.9767e-01, -3.8281e-02, -4.8773e-01, -2.3437e-01,  3.2943e-01,
        -1.7368e-02,  6.6769e-02, -7.5937e-02, -1.4202e-01,  4.1492e-02,
         3.5853e-02, -7.8168e-02,  1.9843e-01, -8.1013e-02,  4.4678e-03,
         1.2456e-01,  1.0513e-01, -3.1880e-02,  1.9482e-01, -1.1654e-01,
         7.6613e-02,  6.0034e-01,  7.2788e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ -2.2485, -11.2623,  -0.8996,   0.1541,  -0.5727,  -0.1489,   0.1923,
         -0.1612,   0.5307,  -0.0349,  -0.1481,  -0.4009,  -0.3302,   0.0309,
          0.2116,  -0.0750,  -0.0467,   0.2665,  -0.0700,  -0.0834,  -0.1252,
          0.1590,  -0.0358,   0.3947,  -1.6072,  -0.3225,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3519,  6.6156,  1.4149, -0.1395,  0.1019, -0.4190, -0.3987, -0.0169,
         0.0733,  0.2037,  0.1545,  0.0136,  0.0264,  0.0131, -0.0828,  0.1422,
        -0.0599, -0.1358, -0.3788, -0.1811,  0.6618,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5274e-01, -1.5895e+01, -2.8096e+00, -5.2967e-01,  1.5733e+00,
         3.0610e-01,  2.6476e-01, -4.7110e-01,  2.5675e-01,  4.8993e-02,
         1.7670e-01,  2.1456e-01,  2.9133e-01,  7.0950e-01, -5.1061e-02,
         7.0731e-02, -3.0722e-03,  8.2599e-01,  8.6464e-02,  6.1975e-01,
         4.3050e-01,  2.7307e-01,  1.2658e-01, -5.5313e-02, -7.2635e-02,
         1.9883e-01, -2.7153e-03,  2.4814e-01,  1.9526e-01, -9.4757e-02,
         3.0124e-01, -1.2308e-01, -1.4102e-01,  4.3273e-01, -7.1702e-02,
         1.0383e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5119,  7.0017,  1.2451,  0.0951,  0.4184,  0.3924, -0.5522,  0.5389,
        -0.3135,  0.4879,  0.6938,  0.1036,  0.2830, -0.2107, -0.0564,  0.1038,
         0.0293,  0.6083,  0.1551, -1.0180,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0343, -9.2455, -1.0875,  0.0221, -0.1963,  0.2371, -0.3014,  0.0410,
         0.4124,  0.2524, -0.4693,  0.3160,  0.1834,  0.0313,  0.0763, -0.0159,
         0.0526,  0.2778,  0.0590, -0.3800,  0.1846,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1040, 10.5549,  1.1065, -0.3535, -0.2350, -0.1940, -0.2900, -0.2683,
        -0.4175,  0.5021, -0.6693, -0.3593, -0.1299,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6812, 14.2932,  1.3266, -0.3450,  0.5438, -0.7812, -1.0592, -0.0679,
         0.0706,  0.0649,  0.5631, -0.2625, -0.2921, -0.3442,  0.3483,  0.0992,
        -0.1356, -0.1393,  0.0155,  0.2802,  0.2998, -0.1534, -0.0520, -0.0318,
         0.1279, -0.5070, -0.1551,  0.2287,  0.0301, -0.0752, -0.0615, -0.1840,
         0.0522, -0.1940, -0.0960, -0.3812,  1.0315], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.4730, -10.2580,  -0.5875,  -0.0803,   0.4307,  -0.2909,  -0.2614,
         -0.0435,  -0.1272,   0.0798,  -0.0524,  -0.1486,  -0.6044,   0.3180,
          0.0814,  -0.3286,   0.0135,  -0.0938,  -0.1001,   0.0496,   0.1306,
          0.1092,  -0.2615,  -0.3806,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4729e-01,  4.1012e+00, -5.4158e-02, -3.2618e-03,  1.9341e-01,
        -2.7162e-02, -1.7746e-01, -2.3801e-02, -1.1894e-01,  1.8114e-01,
         7.2374e-02, -1.0263e-01,  2.7896e-01,  1.3307e-01,  1.5962e-01,
         2.2641e-01, -8.9846e-02,  4.6569e-01,  7.2972e-02,  8.6696e-04,
         3.3297e-02,  1.7421e-01,  2.5139e-02,  2.8475e-02, -1.6266e-01,
         2.2970e-02,  7.3149e-03, -9.0045e-02, -3.1135e-02,  7.3554e-02,
         3.6281e-01, -2.7364e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8557e-01,  7.6451e+00,  5.8227e-01,  9.9825e-02, -2.2107e-01,
         4.4151e-02, -2.2725e-01, -2.5541e-01,  3.2607e-01, -1.3088e-01,
         2.7500e-02, -2.4064e-02, -1.1010e-01, -1.6145e-01, -1.0635e-01,
        -4.7201e-02, -1.4845e-01, -2.2517e-02, -1.1033e-01, -2.0993e-01,
         2.9848e-02,  6.1003e-02, -1.7688e-02, -8.7032e-02, -5.7512e-01,
        -2.9365e-01, -3.5216e-02, -3.2810e-02, -1.0783e-03, -2.9367e-02,
         4.1706e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9406, 20.2537, -2.9954, -0.4691, -0.2277, -0.2739,  0.2822, -0.1728,
         0.3479, -0.1443,  0.6772,  0.9128,  0.7584, -0.1239,  0.6365,  0.9366,
         0.1718,  0.6231,  0.7508,  0.0420,  0.8887,  0.2688, -0.4304,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4182e-01,  1.1244e+01, -8.4497e-02,  6.2569e-01,  2.8434e-01,
         1.4067e-01,  1.2342e-01,  3.8679e-01, -3.1609e-01,  6.9067e-02,
         1.1688e-01, -2.5439e-01, -7.8514e-02, -1.8213e-01, -4.2581e-02,
        -8.0964e-02,  1.1531e-01, -7.3336e-03,  6.8245e-02, -5.0752e-01,
        -6.0627e-01, -2.0534e-02, -1.4407e-01,  7.6641e-02, -1.7423e-02,
        -9.5713e-02, -2.9324e-01, -1.8399e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-1.7407e-01,  9.5926e+00, -4.0273e-01, -1.4476e-01,  1.6162e-01,
         4.6677e-03,  1.9830e-01, -1.0240e-01, -1.6161e-01, -2.6683e-01,
        -2.4316e-01, -3.1229e-01,  1.8428e-01, -1.2244e-01, -1.0553e-02,
        -2.5172e-01, -1.2745e-01,  8.6244e-02, -2.4802e-01,  3.1179e-01,
         2.4400e-01, -3.4673e-03, -2.4342e-01, -6.7737e-02, -1.4371e-01,
        -1.7856e-01, -1.9826e-01,  4.5681e-01, -2.2206e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.5159e-02, -1.0779e+01,  9.9834e-02, -3.5179e-01, -2.8775e-01,
        -2.0738e-01,  6.5271e-02, -6.9129e-02,  1.2277e+00,  3.1302e-01,
         6.3286e-01,  5.2857e-01,  2.3169e-01, -6.5990e-03,  2.3156e-01,
         3.8001e-02, -4.9677e-05, -3.1343e-02, -2.3261e-01,  2.3909e-03,
        -5.2624e-02,  2.8074e-01,  1.1485e+00,  5.7341e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0722,  9.2787,  0.4469, -0.4508,  0.4137, -0.1755,  0.1287, -0.0544,
        -0.3635,  0.0526, -0.3879,  0.0478,  0.0827, -0.0259, -0.4135, -0.0331,
        -0.1463, -0.0987,  0.2760, -0.0638, -0.0500, -0.2804, -0.0216,  0.1065,
         0.0145, -0.0420, -0.2165, -0.1504, -0.1960, -0.1118,  0.1268,  0.2224,
        -0.7276, -0.1399], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6064e-01, -6.4881e+00,  1.2287e+00,  3.0400e-01, -1.1941e-01,
         1.3408e-01, -7.5919e-01, -5.4216e-02,  1.1552e-01,  1.0645e-01,
        -6.7698e-02,  3.9148e-03, -5.9299e-02,  5.8086e-03, -8.4371e-02,
        -1.1415e-01, -2.3891e-02, -6.9988e-02,  7.8811e-02, -2.1872e-01,
         1.0631e-01,  5.9304e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8164e-02, -4.1267e+00, -5.1259e-01, -2.0107e-02,  5.8812e-01,
         4.9892e-01, -4.5180e-01,  2.5585e-01, -3.6157e-03,  1.5023e-01,
        -8.5782e-01, -1.0667e-01, -8.0690e-02, -1.0275e-01, -1.4393e-01,
        -6.0095e-02, -1.4991e-01, -1.6089e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8385,  9.9441,  0.6258,  0.3817, -0.4902,  0.1967, -0.3489,  0.5783,
        -0.2007, -0.3977,  0.1917,  0.0655, -0.2539, -0.0260, -0.1459, -0.2173,
         0.2106, -0.0856, -0.0343, -0.1027,  0.1452, -1.2595,  0.2006,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0083e-02, -1.3785e+01,  2.8945e+00, -2.5938e+00, -4.5907e-01,
        -5.7481e-01, -2.4502e-01,  4.1780e-01,  6.9810e-01,  2.6768e-01,
        -2.2832e-01, -4.2178e-02, -1.2442e-04, -3.7160e-01, -1.6168e-01,
        -8.1473e-02,  2.1244e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  3.4711, -15.7835,  -3.1707,   0.4876,  -0.1571,  -0.0336,   0.3229,
          0.8126,   0.0603,   0.1479,  -0.3296,   0.2112,  -0.1655,   0.3671,
          0.0241,   0.0182,   0.0845,   0.5441,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3517e-01,  8.7479e+00,  9.9246e-01, -3.8546e-02, -1.6136e-01,
         2.9681e-02, -1.2917e-01, -1.7914e-01, -2.5275e-03, -4.9474e-01,
         3.2180e-01, -5.8526e-02, -4.2514e-02, -7.6608e-03,  1.4163e-01,
         4.7616e-02, -2.0989e-01,  3.1443e-01, -4.4059e-01, -4.7683e-02,
        -1.2948e-01,  2.4061e-01,  2.2358e-01, -6.6257e-02, -3.6718e-01,
        -3.9888e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4797,  8.8258,  1.9544, -0.6289, -0.6958, -0.2127, -2.2391,  0.2653,
         0.3118, -0.5309, -0.2344, -0.2085, -0.1588, -0.8447, -0.2837,  0.0781,
        -0.2003,  0.1284, -0.1889, -0.1192, -0.2674, -0.0752, -0.2402, -0.0654,
        -0.7738, -0.0207,  0.0204, -0.0462,  0.5500,  1.4783, -0.2522,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1726,  9.8945,  0.7075,  0.8671,  0.6985,  0.1605,  0.0249,  0.4105,
        -0.1636,  1.5271,  0.1937, -0.8447,  0.0569, -0.0124,  0.2720, -0.1443,
         0.5079, -1.2783,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6879, 12.1496,  0.6238,  0.6645,  0.4778, -0.2647,  1.4484,  1.1132,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-1.9387e+00, -1.6002e+01, -5.3312e-01,  7.6183e-01,  3.7698e-02,
        -4.5510e-01, -7.2507e-01, -1.6468e-01,  5.8311e-01,  5.4726e-02,
         3.3571e-02,  5.7764e-01, -2.2420e-01, -1.6267e-01, -1.6082e-01,
        -9.2196e-02,  8.6091e-02,  2.6568e-01,  1.5412e-01,  8.5601e-02,
        -1.7851e-01, -1.0254e-01, -2.1850e-01, -9.9543e-02,  1.1209e-01,
        -1.5175e-01, -8.5393e-03, -1.7319e-01,  3.1305e-01, -3.4430e-02,
        -3.3824e-01,  8.5508e-02, -2.4303e-02, -1.2519e-01,  1.1063e-01,
        -1.1238e-01,  1.9494e-01, -2.7466e-01,  1.6652e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0144,  3.2411, -0.2224,  0.2141,  0.1596,  0.1027,  0.0450,  0.0373,
         0.1310,  0.0922,  0.1754,  0.1233,  0.1257,  0.1313,  0.0037, -0.0268,
        -0.1163,  0.1342,  0.0850,  0.1645,  0.3732, -0.2035, -0.0501,  0.0514,
        -0.0253,  0.0126,  0.1547, -0.1456,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2442, -7.4588, -0.2997, -0.0859, -0.1125,  0.1788,  0.1504,  0.0736,
         0.1304,  0.3327, -0.0272,  0.0745,  0.1737,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0116e-01, -1.0792e+01,  2.8169e-01,  8.1488e-01,  1.6143e-01,
        -2.8211e-01,  7.6241e-01,  1.4801e-01,  1.8744e-01, -5.0512e-02,
         4.3504e-01,  4.9787e-01,  2.5373e-02, -7.1395e-03, -4.0028e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0087,  4.9831, -0.3633, -0.9232, -0.0454, -0.3094, -0.2254,  0.0583,
         0.3224,  0.0771, -0.0056,  0.1486,  0.4355,  0.2853,  0.0421,  0.0198,
        -0.1568,  0.0650, -0.0992,  0.0091,  0.0520, -0.1103, -0.0555,  0.1085,
        -0.0361,  0.1315, -0.0825,  0.0825,  0.0312,  0.0398, -0.1029, -0.0449,
        -0.1204,  0.0828,  0.1147,  0.1440,  0.1429,  0.0064, -0.0636, -0.0062,
        -0.0216,  0.0091, -0.0279, -0.0727, -0.0790, -0.2643, -0.6656],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1695e-01,  7.3930e+00,  6.9679e-01,  1.3843e-01, -1.3377e-01,
        -6.6149e-01, -2.0471e-01,  2.4608e-02, -9.3843e-02, -1.0495e-01,
        -1.1802e-01,  1.4320e-01, -1.3051e-01, -9.2783e-02, -3.9925e-02,
        -1.8522e-01, -8.7277e-02, -2.7153e-01, -8.8451e-02,  5.8452e-02,
         2.1240e-02, -5.9963e-02,  8.8017e-02,  5.2296e-02, -1.1029e-01,
        -7.8536e-02, -8.4126e-02,  6.4610e-03, -1.2280e-02, -2.1113e-01,
         1.4030e-02, -3.9329e-02, -2.7690e-02, -9.3441e-02,  6.7751e-03,
        -4.2374e-02, -1.1240e-01, -3.2465e-02, -6.5057e-02, -6.3071e-02,
        -2.8168e-02, -2.9592e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5054e-02,  1.3439e+01,  1.1936e+00, -1.0436e-01,  1.7082e-01,
         2.1965e-01,  7.1389e-01,  7.2085e-01, -2.0811e-01,  3.6741e-01,
         1.3199e-01, -7.3298e-01,  9.1729e-02, -4.4282e-03,  6.5576e-02,
         5.3230e-02,  1.8568e-01,  1.1660e-01, -1.2881e-01,  2.5818e-01,
        -2.7050e-01,  1.0136e-01, -2.8931e-01,  5.1647e-01,  2.9078e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1786e-01, -6.0086e+00, -5.9466e-01, -3.4196e-01, -1.7470e-01,
         4.4646e-02, -3.1871e-01, -8.4518e-02, -9.7509e-02,  1.1975e-01,
        -5.5519e-02,  2.2443e-01,  2.0209e-03, -4.2884e-02,  9.8594e-02,
         1.5320e-01,  1.9665e-01, -9.7872e-02,  2.6639e-01, -4.0845e-02,
        -5.5963e-02,  1.6677e-02,  6.0220e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9959e-01, -1.5246e+01,  1.8186e-01,  4.5289e-01,  7.5143e-01,
         4.7694e-01,  3.2047e-01,  5.4226e-01,  5.7431e-01, -1.3491e-01,
        -3.6390e-01, -3.6882e-03,  3.4503e-02, -2.6612e-01,  3.3657e-01,
        -1.4614e-01,  9.9578e-01,  3.0440e-02,  4.5165e-01, -4.4258e-02,
         2.2271e-01, -2.1171e-01, -7.1011e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8488e+00, -1.0240e+01, -3.7555e-01,  3.8174e-01,  1.1938e-01,
         1.2087e-01, -2.7899e-01,  2.8700e-03,  3.0450e-01, -6.2639e-01,
        -3.4657e-01, -5.1426e-01, -2.4106e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8572e+00,  1.6529e+01, -3.5280e-01,  1.0336e+00, -8.2953e-01,
        -1.1923e+00, -2.0179e-01,  1.8997e-01, -3.7829e-01, -8.3085e-01,
         1.6467e-02,  1.6859e+00, -6.3014e-01,  1.6503e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3375e-01,  8.7367e+00,  4.5152e-01,  3.6897e-01, -2.5388e-02,
        -1.0269e-01,  1.1412e-01,  1.2900e-01,  2.4255e-03, -2.7208e-01,
        -4.0628e-01, -5.6953e-02,  2.4880e-01,  1.2225e-03,  5.0624e-02,
         1.9739e-02, -2.3863e-01,  1.8383e-01,  4.7895e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-2.0702e+00, -1.3894e+01,  6.2411e-02,  3.0330e-01,  1.6240e-01,
         2.0380e-01,  3.2525e-02, -4.7487e-02,  1.4419e-01,  1.6746e-01,
         2.0098e-02,  5.6460e-03,  6.7327e-03,  3.2594e-01, -6.3889e-02,
         3.8897e-02, -1.3736e-01, -4.1361e-02,  3.6740e-02,  8.0440e-02,
         2.0524e-01, -1.8438e-02,  4.6943e-02,  1.3706e-01,  1.9168e-01,
         2.3548e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.7166, -12.4165,  -0.9621,   0.1372,   0.2469,   0.3969,   0.5678,
         -0.2334,   0.3194,   0.4129,   0.1708,   0.2205,   0.0565,  -0.1520,
          0.4469,   0.0513,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -2.8754, -12.6883,   1.2545,  -0.9487,   0.3242,  -0.7163,  -0.9657,
          0.5771,   0.5111,   0.2078,   0.6095,   0.8275,   0.3707,  -0.0652,
          0.4067,   1.1888,  -0.1649,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2352,  5.9805,  1.6970,  0.7761,  0.5026,  0.4809, -0.3578,  0.7793,
         0.5564,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.2884e-01,  3.5366e+00,  5.8343e-01,  2.5655e-02, -1.3013e-01,
        -1.5443e-01, -9.5202e-02,  4.0332e-02, -1.3944e-01, -9.9210e-02,
        -2.6595e-04,  9.0974e-02, -9.6134e-03,  7.1750e-02, -2.0119e-02,
        -1.6833e-01, -4.5684e-02, -3.4188e-02,  8.5081e-03,  8.7143e-02,
        -2.3089e-02,  5.4935e-02, -1.0058e-02,  1.5326e-02,  7.3379e-03,
        -1.2275e-01,  1.0282e-01,  3.7750e-02,  2.6205e-02,  1.4742e-02,
        -4.3412e-02,  6.0573e-03,  8.6977e-03,  1.0954e-01, -7.5560e-03,
        -2.9675e-01, -1.4597e-02,  1.8976e-01, -4.8696e-02,  3.0816e-02,
        -1.3963e-01,  1.1841e-01, -9.8163e-02, -1.8634e-01,  5.4009e-02,
         4.5644e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7883, 13.6856, -0.0769, -0.5426, -0.2198,  0.6193,  0.7247, -0.2862,
         0.1408,  0.1379, -0.1870,  0.0475, -0.1206,  0.1381,  0.0556,  0.4703,
         0.3426,  0.8261, -0.1333,  0.2756, -0.5326,  0.1703, -0.3330, -0.1724,
         0.1942, -0.0545,  0.5902, -0.0431, -0.3284, -0.3716,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1933, -6.5814, -0.0133,  0.1217,  0.6228,  0.7831, -0.3168,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.3790, -12.0896,   0.3613,  -0.3166,  -0.2698,  -0.3914,   0.2780,
         -0.2732,   0.1372,   0.1528,   0.2895,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3037, -7.1412, -0.7900, -0.6781, -0.1340, -0.2360, -0.3024, -0.0156,
        -0.5127, -0.0560, -0.0298, -0.0509,  0.1528, -0.3498,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6123e-01,  7.0167e+00, -2.1612e-01,  4.2145e-02, -1.2162e-01,
         7.0821e-02, -8.6697e-02,  1.0534e-01,  6.1626e-02, -3.8037e-01,
         1.5264e-01,  7.0637e-02, -5.9538e-03, -1.0514e-01,  1.2135e-01,
        -2.1136e-03, -3.1415e-03,  1.3375e-02, -1.0810e-01,  2.0421e-02,
        -8.2419e-02, -6.9342e-02, -1.6587e-01,  3.8488e-02,  1.9345e-02,
         2.1592e-01, -1.4317e-01, -7.3108e-02, -1.5833e-01, -3.4341e-02,
        -5.5996e-02,  4.3221e-02,  4.5029e-01,  3.3077e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9229e-01,  5.7390e+00,  1.4101e-02,  3.2345e-01, -1.0587e-01,
        -9.5475e-02,  1.9775e-01, -6.4275e-04,  9.4612e-02,  2.0863e-01,
        -3.0303e-01, -7.6183e-02, -1.0775e-01,  4.3083e-02, -1.7117e-01,
        -1.5089e-01, -2.4148e-01, -3.0213e-01, -1.2045e-01,  3.8565e-02,
        -4.4854e-01, -5.5130e-02, -3.2880e-01, -2.6488e-02,  3.1780e-01,
        -2.8135e-01,  8.8353e-02,  3.8691e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.2046, -14.7870,  -1.4607,  -0.4852,  -0.7598,  -0.0780,   0.1029,
          0.1777,  -0.3178,  -0.2752,   0.2079,   0.4235,  -1.5600,   0.4800,
          1.6771,   0.1860,  -0.1352,   0.8714,   0.0368,   0.2315,   0.1642,
          0.3937,  -1.2034,   0.3447,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([-2.3169, -9.2213, -0.7207, -0.8044,  0.0657, -0.1212, -0.0531, -0.0330,
         0.0095, -0.0659,  0.6863, -0.3702,  0.3974, -0.1147, -0.1325,  0.0252,
         0.3122,  0.0372,  0.0218, -0.3116, -0.0960,  0.2536,  0.1306, -0.0840,
        -0.1220, -0.2070, -0.1866, -0.2552,  0.1180,  0.6539,  0.3382,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.0737, -10.4130,   2.5184,  -0.2221,   1.7408,  -0.3238,   0.0931,
         -0.5698,  -0.2507,   0.4151,  -0.3485,  -0.3341,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.2688, -16.5042,  -0.3687,  -0.5634,  -0.5693,  -0.0975,   0.1260,
          0.0873,  -0.7736,   0.5522,  -0.6228,  -0.1063,  -0.3790,  -0.6244,
          0.1382,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6491e-01,  1.2595e+01, -5.7730e-01, -3.1186e-01, -4.0973e-01,
        -3.2788e-01, -5.3745e-01,  7.9611e-02,  3.7579e-03, -2.6444e-01,
         2.5191e-01,  1.0809e-01, -1.0882e-01, -1.8684e-01, -1.7391e-01,
         3.2674e-01, -4.6994e-02, -1.6017e-01, -2.3512e-01, -3.7094e-01,
        -4.6323e-02, -4.0521e-03, -1.3393e-01, -1.3196e-01,  1.3419e-01,
        -3.1331e-01, -7.1618e-02, -8.8667e-02, -1.2168e-01,  2.4852e-02,
        -1.9598e-01, -4.4041e-03,  8.1618e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3126e-01, -9.5840e+00,  3.4391e-01,  8.9873e-01, -3.4477e-01,
        -8.4756e-02,  2.5359e-02, -8.7641e-02,  1.7350e-01,  1.4967e-03,
        -2.6714e-01,  1.0689e-01,  1.1365e-01,  5.6474e-01,  4.9075e-01,
         4.0839e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.4914, -16.3245,  -0.8277,   2.4772,   0.2162,   0.4808,  -1.1710,
          0.9297,  -0.3187,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5990,  9.7650,  0.0390,  1.4846,  0.7920,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.4254, -14.1770,   1.2339,  -0.4975,  -0.0579,   0.3192,   0.3398,
          0.0339,   0.6106,  -0.2743,  -0.6530,   0.0794,  -1.2420,   0.6921,
         -0.1041,  -0.4558,  -0.2695,  -0.5074,  -0.2344,  -0.0406,  -0.4414,
         -0.2458,  -0.3052,  -0.1486,  -0.5682,   0.5245,   0.3497,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2837,  8.3991,  0.7739,  0.3803, -0.7496,  0.2837, -1.1246,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2249e-01, -3.3830e+01, -1.2770e+00, -1.7407e-01,  5.1742e-02,
         1.7947e-01,  5.6854e-01, -2.1607e-02,  5.3484e-02,  1.1385e+00,
         5.5035e-01,  5.9580e-01, -3.2142e-01,  7.3219e-01, -7.3420e-02,
         3.9751e-01, -6.3770e-01,  5.9537e-02,  6.0684e-02,  1.2460e-01,
         4.6712e+00, -1.8167e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6211e-01, -9.6064e+00, -4.2763e-01,  3.5523e-02, -2.8609e-01,
        -8.0065e-02, -4.1613e-03,  2.6448e-01,  4.1690e-01, -1.7836e-01,
        -2.5999e-01,  4.5189e-02,  1.7391e-01, -1.7705e-01, -6.0575e-03,
         6.6117e-02,  3.0589e-01, -9.6778e-02,  6.3870e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.3172, -12.5479,  -0.4339,   0.2565,   0.2665,   0.1716,  -0.2627,
         -0.3861,   0.2838,  -0.0339,   0.0815,  -0.0882,   0.0608,   0.4152,
         -0.0837,   0.0746,  -0.0246,  -0.0275,   0.0142,  -0.2744,  -0.0298,
          0.1577,   0.2932,   0.0478,   0.1248,  -0.0634,   0.0265,  -0.2764,
          0.3069,   0.6271,   0.1645,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 5.2718e-01,  1.9208e+01,  3.4149e+00, -1.2165e+00, -8.5291e-02,
         1.9411e-01,  5.0896e-02, -2.3788e-01, -6.4912e-02,  3.0111e-02,
         1.0169e-01, -1.9042e-01, -7.8846e-01, -4.1887e-01, -7.9036e-02,
         4.7758e-01, -8.5131e-01,  6.2478e-01, -1.3903e+00,  3.2106e-01,
        -2.4984e-01, -6.7481e-01, -1.5460e-01, -2.7819e-01,  1.1058e-01,
        -1.4295e-01,  1.5367e-01, -4.1071e-01, -1.0420e+00, -5.5497e-01,
        -3.9593e-01,  9.6593e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9212e+00,  9.3948e+00, -4.8675e-01,  3.6423e-02,  9.5847e-02,
        -3.3526e-01,  2.4586e-01,  8.7068e-02,  9.8127e-01,  3.2593e-02,
         4.0425e-02,  3.4419e-01, -1.4754e-01, -8.7187e-02,  5.2628e-01,
        -1.9191e-01, -3.7918e-02,  1.4904e-01, -5.0280e-02, -3.9249e-03,
         3.5661e-02, -1.0310e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1491, -9.4842,  0.4732, -0.4064,  0.4445, -0.4596,  0.8562, -0.7167,
        -0.5004, -0.2608,  0.0345,  0.2078, -0.3110, -0.0100, -0.1614, -0.2388,
         0.2135, -0.0858, -0.5206,  0.0725, -0.2516,  0.1652, -0.1803, -0.0199,
         0.1701, -0.1312,  0.0597, -0.3551,  0.3181,  0.4668,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2737,  6.8207,  0.1625, -0.2435,  0.2716, -0.3078, -0.5052,  0.0305,
         0.0608, -0.0094, -0.0917, -0.2991,  0.0400, -0.1269, -0.0721,  0.1650,
         0.5340,  0.3230,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1676e-01, -1.7257e+01, -1.0777e+00, -8.4448e-01, -1.1391e-02,
         1.1495e+00, -4.4792e-02,  1.0847e-01,  1.6287e-01, -9.9699e-02,
         2.0190e-01,  8.4431e-02, -2.2714e-01, -1.0017e-01, -5.2727e-01,
        -5.1506e-01, -6.3590e-01,  6.8943e-03,  3.1320e-01, -2.5993e-01,
        -7.3391e-04,  3.5660e-01,  4.0532e-01, -2.4707e-02, -1.5445e-01,
        -6.0082e-02,  1.8402e-01, -1.0610e-01, -3.4519e-01, -1.2082e-01,
        -2.9931e-01,  3.3423e-02,  6.0375e-02,  6.7002e-02,  6.8000e-02,
         3.0179e-01,  7.2897e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5597e-01, -8.3487e+00,  1.5427e+00,  2.1396e-01,  1.8675e-01,
        -2.9270e-03,  4.0875e-02, -3.6727e-01,  1.0704e-01, -7.9919e-02,
        -3.6199e-02,  5.7680e-02,  8.7092e-02, -2.0130e-01,  2.5852e-02,
        -1.3193e-01,  6.7238e-02, -2.1296e-01,  4.3669e-02, -1.3924e-01,
         1.8078e-01, -6.5839e-02, -1.7454e-01, -9.5378e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0821e-01, -9.0734e+00, -7.3729e-01, -7.6259e-01, -6.5304e-01,
         2.6931e-01, -4.0461e-01,  1.0377e-01, -1.4751e+00,  2.2234e-01,
        -1.2206e-01,  2.0542e-01,  2.8617e-01, -1.0311e-01, -8.8466e-02,
        -2.6661e-02,  1.5698e-01,  6.0229e-02,  3.2794e-02,  7.5945e-02,
        -2.4360e-02,  1.6113e-01,  2.2267e-01,  8.2133e-02, -2.8634e-01,
        -1.7307e-01, -1.6294e-01, -1.5311e-01,  9.9278e-02,  3.6288e-02,
        -6.5878e-01, -8.6462e-02, -6.1706e-02, -6.1408e-02, -2.0071e-01,
         2.8170e-01,  2.4039e-01,  5.9516e-02, -5.6496e-02,  8.5120e-02,
         1.0937e+00, -1.9670e-04,  4.7478e-03, -2.2408e-01,  2.8387e-01,
         3.2868e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3534e+00, -1.1893e+01,  2.2829e-03, -1.1401e-01,  1.3131e+00,
        -1.3498e-01,  1.9520e-01, -1.5234e-01, -2.8886e-02, -1.5773e-01,
        -1.7668e-02,  3.8791e-01, -1.3963e-01,  8.0411e-03, -1.1363e-02,
         5.2881e-02,  6.2069e-03, -8.7690e-02,  2.3106e-01, -1.4077e-02,
        -9.2899e-02,  1.0518e+00,  5.2041e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6943e-01,  2.5163e+01, -1.3418e-03, -2.2119e-02,  1.6566e+00,
         1.1179e-01, -6.1392e-01, -2.7693e-01, -1.4574e+00, -8.4769e-02,
         2.4051e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3229e-01,  1.4401e+01,  1.0884e+00, -4.9866e-01, -8.3965e-01,
         5.1843e-01,  2.7930e-01,  2.4988e-03,  4.2863e-01, -2.6094e-01,
        -6.1452e-01, -2.7301e-01,  3.1654e-01, -4.1131e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.0394, -10.3426,  -0.5002,   2.1355,   0.8039,   0.4452,   0.1717,
         -0.1721,   0.0218,   0.8899,  -0.0471,  -0.2042,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1206e-01,  3.8953e+00,  2.8823e-01, -2.2059e-02, -2.6900e-02,
        -3.7572e-02,  4.5785e-03,  8.8656e-02,  4.8289e-02, -5.9650e-02,
        -2.2132e-02, -1.1789e-01,  6.7487e-02, -1.0227e-01,  1.6814e-02,
         3.9521e-02, -2.6173e-02,  4.5331e-02, -2.9320e-02, -1.7615e-01,
        -1.4850e-02,  1.8545e-02,  3.8060e-02,  6.1958e-02,  7.5084e-03,
         1.7354e-01, -2.8064e-03, -3.5916e-02, -4.1659e-02, -2.5174e-02,
        -2.7495e-02,  1.1927e-02,  8.1657e-03,  1.8565e-02,  3.7229e-03,
         2.8944e-02,  7.7095e-03,  1.0454e-02,  1.1680e-02,  8.6123e-03,
         2.5973e-02, -1.0929e-02,  2.3209e-02,  3.7753e-02,  3.7141e-05,
         2.4602e-02,  1.8055e-02,  1.9795e-02,  9.7690e-03, -3.3931e-02,
         4.7006e-02, -1.9754e-02, -1.8736e-02,  1.5171e-02,  6.5286e-02],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-5.1479e-01, -7.7306e+00, -3.9187e-01,  1.0143e-01, -2.2139e-01,
        -7.4454e-02,  1.5762e-01,  9.3578e-02,  9.3725e-02,  5.2360e-02,
         2.7958e-01,  1.9677e-02, -2.8117e-01, -9.6246e-02, -3.6674e-02,
        -2.2182e-02,  1.4134e-01, -3.6572e-01,  2.9145e-02,  3.4857e-02,
        -1.1047e-03,  7.9535e-01,  6.8968e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.1699, -10.4692,  -1.4044,  -0.4323,   0.0427,  -0.2957,  -0.1335,
         -0.2488,  -0.3591,  -0.1171,   0.6714,  -0.0414,   0.2356,   1.0184,
          0.5247,  -0.0303,  -0.4976,   0.2898,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7329e+00,  6.7104e+00,  1.4353e+00, -7.4171e-01, -2.1444e-03,
         6.3650e-01,  2.2745e-01, -4.3501e-01, -6.0968e-01, -7.1890e-01,
         8.9031e-04, -4.2385e-01,  5.6700e-01, -5.2675e-01, -2.9733e-01,
         1.1703e+00,  7.0088e-01, -6.3371e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4841e+00, -7.1377e+00,  7.6604e-01,  1.0061e+00,  2.6641e-01,
         4.4048e-01, -1.4171e-01,  1.6360e+00,  2.1387e-01,  2.5481e-05,
        -5.3794e-02,  1.3732e-01,  4.0852e-02,  1.1962e+00,  2.8478e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.5470, -11.4238,   0.2131,  -0.2537,   0.0678,  -0.3173,   0.3053,
          0.0587,  -0.6990,  -0.1919,   0.2631,  -0.0699,   0.1479,  -0.1306,
         -0.1082,  -0.0442,  -0.1392,  -0.1765,   0.1575,   0.4105,  -0.0488,
          0.3695,   0.0394,   0.0663,   0.4625,   0.1450,   0.4222,   0.0381,
          1.0957,  -0.3288,  -0.2067,   0.0377,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4174e+00, -6.5774e+00,  2.9675e-01, -4.2978e-02, -1.8520e-01,
         1.4715e-01,  9.1338e-02,  2.0075e-01,  6.4054e-02, -2.5530e-02,
        -2.1769e-02, -1.9109e-01, -9.6021e-02, -6.8789e-02, -6.6760e-02,
         8.2389e-02,  8.9104e-04,  4.0524e-01, -2.6554e-01,  6.8419e-01,
        -3.8502e-01,  3.2277e-01,  3.1136e-01,  7.2918e-02, -1.2659e-02,
        -1.1217e-01,  1.6858e-01, -2.6204e-01,  3.3158e-02,  1.1380e-01,
        -2.1494e-01, -9.0471e-02, -3.1399e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0560e-01, -8.7707e+00,  3.6056e-01, -2.9106e-01, -3.2242e-01,
        -6.9906e-01, -2.5578e-01, -3.0446e-01, -1.1335e-01,  7.4408e-02,
         2.7321e-02,  6.0059e-01, -1.2190e-01, -7.3030e-02,  2.0713e-01,
         2.8484e-01,  1.0103e-01,  8.0015e-02,  1.9872e-01,  1.2757e-01,
         1.3587e-01, -2.8665e-02,  9.0531e-02,  2.0731e-01,  3.6598e-02,
        -2.5712e-02, -6.5884e-02, -2.2366e-01, -1.0898e-01, -3.8332e-02,
        -8.4050e-02, -1.7725e-02,  4.0897e-03, -8.1876e-02, -7.2245e-02,
        -5.4230e-03,  4.9324e-02,  7.7521e-03,  1.4333e-02, -1.7792e-01,
        -7.7599e-03,  8.3223e-03,  4.8075e-02,  8.4739e-02,  2.5477e-02,
        -9.4999e-02,  1.3680e-02,  3.2871e-02, -7.3154e-02, -2.4350e-02,
         4.5149e-01, -2.4368e-02, -5.9213e-02, -1.3229e-01, -1.4842e-01,
        -1.2710e-01, -7.2379e-02, -1.1599e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1455e-01, -1.1081e+01, -1.8262e+00, -8.5443e-01, -1.4794e-01,
        -1.0010e-01, -6.2151e-02,  3.3662e-02,  1.6880e-01, -2.2916e-02,
        -2.4537e-01,  1.3242e-01,  7.0147e-01,  4.5327e-01,  2.5207e-01,
         1.1825e-02, -1.5907e-01,  6.2843e-02,  8.4172e-04, -2.0391e-02,
         5.7578e-02, -1.8856e-01,  1.6101e-01, -8.8713e-02,  6.0230e-01,
         4.4186e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.4900e+00, -2.2802e+01, -9.7026e-02,  5.5757e-01,  7.6270e-01,
         2.1850e-03,  6.8371e-01, -8.6441e-02,  1.8512e-01,  5.0092e-01,
        -3.6922e-01,  2.3553e-02,  5.1158e-01,  1.5814e-01,  1.5115e-01,
         1.8704e-02, -8.5960e-02, -9.5841e-02, -3.2004e-01,  3.4262e-01,
         1.2495e-01,  5.7039e-01,  8.2503e-01,  9.3063e-01,  3.5745e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2405e-01, -1.1221e+01,  1.8773e-01,  3.9823e-01, -7.5933e-01,
         6.9671e-01, -1.8100e-01, -1.7158e-01,  3.5605e-01, -3.6328e-01,
         3.3432e-01,  4.9938e-02,  1.3070e-02,  5.2170e-02, -1.5479e-01,
        -4.5212e-01,  2.9315e-02, -1.4948e-01,  3.7840e-02,  3.5415e-02,
         1.8526e-01,  2.4091e-01,  4.9407e-02,  2.9517e-01, -9.5906e-02,
         1.7225e-01,  3.6062e-01,  1.5956e-01,  6.0123e-03, -1.9726e-01,
         2.4212e-01, -1.4619e-02, -2.3456e-01,  1.7266e-02, -3.1263e-01,
        -4.8618e-02,  3.8451e-02, -6.4081e-01, -1.1619e-01,  3.7963e-02,
        -4.9750e-03,  4.9226e-02, -2.1345e-01, -2.5125e-01, -1.2122e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.8113e-02, -1.1432e+01, -1.4336e+00, -4.6817e-01,  3.7621e-01,
         2.2044e-02, -6.1529e-02,  1.5662e-02,  1.8611e-01, -4.8305e-02,
         1.8322e-01,  4.0938e-02,  1.5380e-01,  5.0869e-02, -1.9376e-02,
        -2.0511e-01, -3.4560e-01,  6.3866e-02,  8.9005e-03, -2.1465e-01,
        -1.0930e-01,  5.4400e-01,  2.1476e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2314, 14.5602,  1.6126,  0.1428,  0.3909, -0.3094,  0.0767,  0.1655,
         0.1615,  0.2626, -0.1597, -0.7586,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.3862, -7.5007, -1.1362,  0.3378, -0.0772, -0.0517, -0.1636, -0.1302,
        -0.1936,  0.1928, -0.0407, -0.0849, -0.0391, -0.1327,  0.0650,  0.0761,
         0.0839, -0.0104,  0.0604,  0.3550,  0.1224,  0.2456,  0.0510,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2669e+00, -1.1288e+01, -5.0689e-01,  3.9317e-01,  1.4091e-01,
         1.0893e-01,  1.7616e-01,  1.7585e-02, -5.1740e-03, -1.9763e-01,
        -1.3236e-01, -1.4156e-01, -1.1365e-01, -2.8170e-01,  5.8394e-01,
         1.2371e-01,  1.4604e-01, -6.3035e-02,  1.1955e-01,  4.9507e-01,
         2.5389e-01, -1.0358e-01,  4.6402e-02,  7.5502e-02,  1.4300e-01,
        -7.9574e-02, -2.3247e-01, -2.9272e-02, -1.4146e-01, -2.2057e-02,
         1.3106e-02, -3.1371e-01,  5.3226e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.2675, -14.0894,   0.4686,   0.8710,   0.5744,  -0.0265,   0.1091,
          0.1628,   0.1081,   0.0727,  -0.1535,  -0.4891,   0.0918,  -0.3469,
         -0.5095,   0.3467,   0.2629,  -0.0880,   0.3067,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.1227, -12.0504,  -0.2163,  -1.0038,  -0.2889,  -0.2406,   0.2357,
          0.1212,  -0.3440,   0.2555,  -0.1912,  -0.1880,  -0.0437,  -0.4783,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6177e+00,  1.6873e+01,  4.6124e-01, -1.2751e-01, -1.1977e-01,
         2.7308e-01, -2.4321e-01,  3.0288e-01,  1.2971e-02,  1.5007e-02,
        -1.0653e-01,  1.8158e-02, -1.6690e-01, -4.1078e-01, -1.3536e-01,
        -1.6308e-01,  1.9573e-01,  1.1630e-02, -3.8796e-01, -1.5054e-01,
         1.2530e-01,  7.2902e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1668e-01,  1.1033e+01,  5.6933e-01,  8.1916e-01,  4.3232e-01,
        -9.8329e-02, -6.6408e-02, -5.7095e-01, -3.3729e-01, -1.0966e-01,
        -5.4640e-01, -7.9176e-02, -3.5530e-01,  2.1980e-01,  3.2684e-02,
         1.0019e-02, -9.1999e-02, -4.3873e-02, -6.8104e-02,  8.4146e-02,
        -1.6998e-01,  9.1533e-02, -1.7109e-02, -2.1398e-01, -1.6451e-01,
        -2.5622e-02,  1.1500e-02,  7.4744e-02, -2.0759e-01, -9.0582e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2992,  5.7814,  0.3423, -0.8138,  0.1226,  1.0991,  0.3205, -1.0413,
         0.1305, -0.2402, -0.6837,  1.0859,  0.4716,  0.0141, -0.0793, -0.2457,
         0.0163,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5034, 15.3735,  1.1913, -1.1130, -0.2249, -0.1843, -0.1463,  0.3858,
        -1.6902, -0.4041, -0.3761,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5303e+00,  1.9099e+01,  1.5108e+00,  2.7669e-01, -7.4444e-01,
        -3.9331e+00, -6.9151e-01, -6.9418e-01, -1.8828e-01,  4.1056e-01,
         7.2662e-01,  2.3504e-01, -6.3942e-02,  1.9464e+00,  7.2794e-03,
        -1.6805e+00,  5.9752e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3024,  4.7659,  0.4043,  0.0255,  0.0841,  0.0438,  0.1900,  0.0164,
        -0.1878, -0.0907, -0.1529, -0.2362, -0.1130,  0.0198, -0.5159, -0.0526,
        -0.0096, -0.0240, -0.1520, -0.0326, -0.0178, -0.1637, -0.0857, -0.0485,
        -0.0265, -0.2303,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0641e-01,  1.1384e+01, -8.5344e-01,  2.0446e-01,  1.4943e-01,
        -5.2967e-01, -3.2795e-01,  2.9066e-01,  7.2620e-01, -4.8496e-02,
         2.7575e-01,  7.6445e-02,  2.9070e-01,  4.7480e-01,  1.5683e-01,
        -1.8766e-01, -2.7448e-01,  5.2866e-01, -1.8900e-01, -1.2786e-01,
        -2.5018e-01, -8.7088e-02,  2.1599e-03,  2.5152e-01, -4.5779e-02,
        -1.4877e-01,  1.7944e-01,  1.3234e-01, -4.6526e-01,  2.8275e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0939e-02,  1.0591e+01,  1.8061e-01, -3.5813e-01,  2.3829e-01,
        -1.3685e-01,  3.1060e-01, -4.2042e-03,  1.8245e-01, -4.7656e-01,
        -6.6830e-02,  5.2949e-01,  1.7678e-01,  5.5424e-02, -1.1574e-01,
         1.0052e-01,  3.0297e-02, -6.0243e-02,  8.4521e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-3.6790e-01,  6.8390e+00,  6.1972e-01, -2.4392e-01, -8.0735e-02,
        -1.2388e-02, -1.3438e-01, -1.2659e-01, -1.1686e-02, -3.9536e-02,
         4.3666e-02, -1.9987e-03,  4.8703e-02,  4.8959e-02,  1.2444e-01,
        -1.4003e-01,  1.4825e-01,  1.6166e-01,  1.0603e-01,  9.2590e-02,
        -7.9166e-02, -1.4242e-01,  7.1734e-02,  1.0191e-01, -1.0253e-01,
        -2.2324e-01,  1.0442e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7708, -3.6415, -0.3867,  0.1333, -0.1880, -0.0972, -0.1487, -0.0504,
        -0.2254, -0.2070, -0.0587,  0.4047,  0.0456,  0.2415,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3577e+00,  1.3936e+01,  6.9919e-01, -9.1022e-02,  9.3294e-03,
         9.5480e-02, -3.9506e-01,  2.6279e-01,  1.7084e-01, -4.9162e-01,
        -4.6573e-01,  2.3168e-01, -1.7382e-01,  7.0757e-02,  8.1768e-02,
        -1.4001e-02, -1.5142e-01,  1.6061e-01,  1.6304e-01,  6.7506e-02,
        -1.0986e-01,  9.9183e-02,  2.6414e-02,  4.4732e-02, -1.8351e-02,
        -4.8518e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0856e-01,  9.3202e+00,  1.0682e+00,  3.7108e-01,  1.8345e-01,
        -6.4978e-02,  2.6004e-03,  2.8296e-01, -4.9026e-02,  3.5635e-01,
        -8.4449e-02,  8.4935e-01,  7.1311e-01, -6.0582e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1680e-01,  8.3588e+00, -2.7193e-01,  1.9543e-01, -1.0742e-02,
        -1.7729e-01,  3.5085e-01, -3.7243e-02, -4.4416e-02, -1.2761e-01,
        -1.4637e-01, -1.6354e-01, -1.6149e-01,  3.8774e-01, -2.8463e-01,
        -9.6757e-03, -4.4233e-01,  4.2813e-03, -9.0120e-02, -1.4309e-01,
        -1.2787e-01, -3.5329e-01,  2.5426e-01,  2.3884e-01,  2.6812e-02,
        -1.5165e-01, -1.3924e-01, -2.8008e-01,  4.7126e-03, -1.0090e-01,
        -4.0730e-01, -6.0586e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2088e+00, -1.3363e+01, -5.3534e-01, -7.0523e-01, -5.5122e-01,
         2.0192e-01,  4.6269e-01,  1.7355e-01,  1.8138e-01,  2.6065e-01,
         2.2012e-01,  3.7682e-01, -4.8600e-02,  1.5278e-01,  2.0592e-01,
        -8.9704e-03,  1.8233e-01,  1.9620e-01,  1.1604e-01, -1.4026e-01,
         4.0438e-02, -6.3376e-02, -5.7431e-02, -4.8418e-01,  6.0252e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1311,  3.6823,  0.8978, -0.2142, -0.0749,  0.1650,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3562,  7.3802,  0.6280, -0.0558,  0.4320,  0.0153,  0.0825, -0.0243,
         0.0216,  0.0952, -0.0379,  0.0950,  0.0610, -0.1605, -0.1822, -0.1128,
        -0.0649, -0.0092, -0.0231,  0.0778, -0.0773, -0.0882,  0.3229, -0.0199,
        -0.1461,  0.1945, -0.0110,  0.0144, -0.1706, -0.0475,  0.2912,  0.1761,
         0.1393,  0.0434, -0.0307,  0.3755,  0.0431], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9506e-01, -4.7629e+00, -4.8313e-01, -3.7861e-01, -9.1886e-02,
        -1.4371e-01, -1.8165e-02,  9.4684e-02,  3.4310e-02,  4.8160e-02,
         3.7652e-02, -3.4712e-02, -7.5398e-02,  9.4486e-02, -1.8020e-01,
         3.4953e-02,  1.8574e-01, -2.3179e-01,  9.9134e-02, -7.7602e-02,
         2.3173e-01,  4.7817e-02,  1.0363e-01,  9.2294e-02,  5.5346e-02,
         3.5115e-02, -3.7370e-03, -1.3255e-01,  3.5305e-02,  3.0429e-02,
         4.7816e-02, -1.2718e-02,  2.9181e-03,  7.0984e-02,  1.3832e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.3874, -21.4532,   1.4180,   0.5057,   0.3729,  -0.4975,   0.0475,
          0.1846,   1.0412,   0.1650,   0.0356,  -0.5646,   0.2419,  -1.8904,
          1.5566,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.3204, -10.2024,  -0.0870,   0.2408,   0.0637,   0.0558,  -0.0647,
          0.8717,  -0.2800,   0.2587,  -0.0870,   0.0132,   0.0443,   0.0432,
         -0.0266,  -0.2047,  -0.0109,  -0.4892,  -0.1411,  -0.0669,   0.0170,
          0.0576,   0.1290,   0.1900,   0.2677,   0.0368,   0.0290,   0.0481,
          0.0817,   0.1315,   0.0541,   0.0344,  -0.3540,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2998, -5.4824,  0.0361,  0.0753, -0.1007,  0.2601, -0.2847, -0.0443,
         0.1877,  0.0462, -0.0237, -0.0079,  0.0727, -0.1185, -0.1071,  0.0105,
         0.0610, -0.0373,  0.0842,  0.0061, -0.0105, -0.0081, -0.0256,  0.0108,
        -0.0282,  0.0726, -0.1294, -0.2127,  0.0143,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 9.1649e-01,  3.7836e+00,  7.7917e-02, -5.4129e-02,  3.3618e-02,
        -2.0423e-01, -1.8825e-02, -1.0118e-01,  2.0305e-02, -1.6042e-01,
         3.3222e-02, -5.8957e-02,  2.6108e-03,  6.7069e-02, -2.3729e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9743e-01,  8.3613e+00,  3.7584e-01, -3.6651e-01,  1.1039e-01,
        -5.1158e-02,  1.9278e-01,  1.6902e-01,  2.6897e-01,  9.3798e-03,
        -3.2059e-02, -9.9555e-02, -8.1496e-02, -7.9627e-02, -1.7750e-02,
        -1.3119e-01, -8.9446e-03,  3.0278e-02, -5.9353e-02,  5.3776e-02,
         3.9282e-02, -3.4200e-03,  4.6284e-02,  2.2371e-02,  3.8274e-02,
         1.1102e-01, -2.3414e-02,  1.5509e-01,  1.1824e-01,  7.4128e-02,
         6.6624e-02,  3.4416e-02,  1.6363e-02, -1.2915e-02, -6.9063e-03,
         6.0142e-02,  1.2390e-02, -7.7929e-03,  7.7514e-02,  5.6661e-02,
         5.0251e-02,  4.4198e-02,  6.4757e-02,  1.8069e-02,  4.4309e-03,
         6.6350e-02,  2.1261e-01,  3.6715e-01,  3.5838e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4942, 12.6972,  0.7468, -0.7844,  0.1730, -0.3165, -0.1707, -0.1110,
        -0.2814,  0.2290,  0.0955,  0.0606,  0.0469,  0.1148,  0.0456, -0.1977,
        -0.5091, -0.0596,  0.2439, -0.2033,  0.0618,  0.0366, -0.0954,  0.0844,
         0.0488, -0.2497, -0.2259,  0.2617,  0.0909,  0.0350,  0.0586,  0.2833,
        -0.1128,  0.7901, -0.0440,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8219e-01, -8.7641e+00, -4.7568e-01, -1.5630e-01, -3.2509e-02,
         2.6493e-01, -1.5416e-01, -1.9655e-01, -8.9206e-05,  2.7377e-01,
         2.1760e-01,  3.0679e-01,  2.4107e-01,  5.0265e-02, -6.8577e-02,
         5.3973e-02,  1.0278e-01,  8.0526e-01,  3.8748e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.3467, -12.3865,   0.2947,  -0.4038,  -0.2334,  -0.5206,  -0.2365,
         -0.3068,  -0.5324,  -0.0991,   0.2652,  -0.0419,  -0.1522,   0.7667,
         -0.0863,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0176, 13.6419,  1.4303, -0.1480, -0.1001,  0.0419, -0.9232, -0.1963,
        -0.1555, -0.6297, -0.6237, -0.6345, -0.5795, -0.3575,  0.4293, -0.4601,
        -0.2977, -0.2697,  0.1184,  0.0309, -0.1084, -0.2635,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.6101e-01,  9.0963e+00,  2.5322e-01, -6.7136e-02,  6.0298e-02,
         6.3673e-02, -1.1120e-01, -9.4557e-03,  1.0879e-03, -1.4339e-01,
         1.1931e-01, -1.7948e-01, -2.0129e-01, -1.7414e-01,  7.3665e-02,
        -1.5822e-01, -1.2578e-01, -7.0864e-02,  1.0530e-01, -4.1536e-02,
         1.7277e-01, -1.0610e-01,  2.9833e-02,  2.7669e-01, -3.3796e-02,
         6.6534e-02, -8.8309e-02, -1.3620e-01,  1.6440e-01,  3.9202e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.1387, -23.4041,   0.7695,  -1.0217,   1.8799,   0.0396,   0.8711,
         -0.2205,  -0.7181,   0.6951,  -0.2974,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2872, 16.8710, -0.0323, -0.3630,  0.7966, -0.2821,  0.4799, -0.0365,
        -0.0469,  0.2916, -0.0583,  0.3626,  0.0180, -0.2137,  0.1177,  0.3783,
         0.1526,  0.2484,  0.2016, -0.3018,  0.4906, -0.2613,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7486e-02, -2.9460e+01, -3.2059e+00, -1.9611e-01, -6.9278e-01,
         1.9572e-01, -3.1886e-01,  1.1787e-01, -1.3059e-01,  2.8849e-01,
         1.4439e+00,  7.8782e-01,  1.0814e+00, -2.6123e-01,  3.8955e-01,
         6.0800e-01, -2.0994e-01,  7.4646e-01,  3.0086e-02,  2.5197e-01,
         3.1841e-01, -3.7861e-02, -9.5954e-01,  4.1265e-01, -1.0237e-01,
         3.1438e-01,  3.9587e-01,  9.5104e-02, -1.0207e-01,  2.5686e-01,
         6.2159e-01,  7.5719e-01,  2.8488e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7322e-01, -7.7514e+00, -1.3295e+00,  3.7245e-01, -1.3914e-01,
         1.6941e-01, -1.1242e-01,  2.6638e-01, -9.9843e-02, -6.4114e-02,
         6.1705e-02, -2.1275e-01, -1.0931e-01, -1.1396e-01,  2.3769e-01,
        -3.5777e-01,  9.3027e-02,  4.9734e-03,  9.0259e-02,  8.1974e-03,
        -2.9228e-03,  9.5102e-02, -1.6619e-01,  2.2787e-01,  6.1793e-02,
         1.0533e-02, -2.9036e-01,  2.2254e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3193e+00, -1.5031e+01, -2.5512e+00, -5.3605e-01, -3.5289e-01,
         1.6718e-01, -2.0615e-01,  1.9912e-01,  7.9478e-01,  4.4676e-02,
         2.7801e-01, -4.0676e-01, -1.1996e-01, -4.4721e-02, -2.0434e-02,
        -4.1984e-01,  2.4221e-01,  9.2513e-03,  7.0623e-02, -5.3031e-02,
        -8.5028e-02,  4.5787e-02, -2.5231e-01,  2.0649e-01,  3.8574e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 3.5852e-01,  7.6421e+00,  4.1875e-01, -3.0414e-02, -8.5023e-02,
         1.2736e-01, -8.2781e-02, -4.1866e-02, -1.0890e-01, -5.5478e-02,
         7.8126e-02, -1.5265e-01,  2.7897e-02,  1.5240e-03,  5.4547e-02,
        -2.8957e-01,  2.8165e-02, -7.6342e-02, -2.3314e-02, -1.6638e-01,
         1.8813e-01, -1.3354e-02, -1.2142e-01, -2.7677e-02,  3.2626e-01,
         1.3430e-01, -7.8539e-02,  6.0080e-03, -5.0569e-02, -1.1985e-01,
         1.9686e-03, -6.3660e-02,  5.1033e-02,  7.3808e-02,  3.9127e-02,
        -1.0664e-01, -1.2957e-01, -1.0521e-01,  1.3578e-01,  3.2827e-01,
        -4.4019e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.2256, -11.6138,  -1.2891,  -0.5306,  -0.0241,   0.1884,  -0.4031,
          0.4424,   0.3387,   0.1213,   0.1632,  -0.2196,  -0.1447,   0.0298,
         -0.2180,   0.1950,   0.0818,  -0.0473,   0.0702,  -0.0198,  -0.1502,
          0.1155,   0.0900,   0.5440,   0.2392,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.4490e-02,  1.0815e+01,  2.8820e-02, -1.3345e-02, -3.8562e-03,
        -5.6703e-02,  3.2528e-01,  5.4470e-01, -1.8422e-01, -3.5871e-01,
         5.0169e-02, -1.8577e-01,  1.4891e-02, -3.3114e-02, -9.7118e-02,
        -5.7248e-02,  1.7387e-01, -2.7099e-01, -7.3323e-01, -1.9416e-01,
         5.4757e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.2377, -19.3950,  -0.8937,   0.3835,   0.9042,  -0.0911,  -0.3488,
          0.0282,   0.6317,   0.4346,   0.3320,   0.2954,   0.0938,   0.1489,
          0.5892,  -0.5366,   1.3766,  -0.1676,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4993e-01, -7.8636e+00,  1.2603e-01, -1.3698e-01, -2.0498e-01,
         3.7159e-01, -1.7511e-01, -1.3797e-01,  8.2661e-02,  3.5254e-01,
        -9.1085e-02, -5.8169e-02, -6.9909e-02,  1.3464e-01, -7.0581e-02,
         1.1888e-01, -9.1617e-02,  4.5436e-01, -3.5349e-01,  5.9150e-02,
         1.8397e-01,  3.0242e-02,  1.4716e-01,  6.8163e-02, -1.2678e-01,
        -3.9654e-01, -8.5401e-02, -1.1527e-01, -8.9551e-03,  9.8329e-02,
        -3.0411e-03, -1.0648e-01,  3.8795e-02, -7.6366e-02,  3.5400e-01,
         6.1403e-02, -5.8750e-02, -2.2881e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2408e-01, -1.5382e+01, -1.4285e+00, -2.1144e-01,  3.2815e-01,
        -6.9120e-02,  1.6707e-01,  2.8635e-03,  3.3088e-01, -4.5815e-02,
        -5.0084e-03,  4.4390e-01, -9.0978e-02,  1.5821e+00,  1.8916e-01,
        -3.6966e-01,  5.7215e-01,  1.6276e-01,  6.7617e-01,  7.5413e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4060, 19.1627,  0.6147,  2.0697,  0.8937, -0.3166,  1.0995,  0.8360,
        -0.5257,  0.4807, -0.7606,  0.0430, -0.7339, -1.6299,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6929e-01,  1.5160e+01,  9.3049e-01,  2.5065e-01,  6.5966e-02,
         3.0896e-01,  3.8736e-01,  2.7008e-02, -6.7937e-02, -7.1974e-02,
        -2.6409e-01, -8.5749e-02,  2.3803e-02,  8.6224e-02,  1.7283e-01,
        -2.2550e-01,  2.9703e-01,  8.0810e-02,  2.5068e-03, -7.1216e-02,
        -1.4444e-01,  2.6596e-01,  3.1691e-02,  2.4032e-01, -1.2417e-01,
         1.1433e-02,  3.0215e-02, -5.5199e-02, -3.5782e-02,  1.7206e-01,
        -1.8817e-02,  8.4388e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2701e+00, -2.5032e+01, -2.2993e-02, -1.5217e+00, -1.2624e+00,
        -4.0410e-01, -4.8166e-01,  9.4960e-01,  7.4941e-01,  1.3887e+00,
         3.9692e-01,  2.5915e-01, -1.8668e-01, -3.3256e-01,  4.9434e-01,
        -1.1183e-03,  3.0592e-01,  6.8487e-01,  4.2186e-01,  1.7829e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1170e-01, -3.6021e+00, -1.4893e-01,  8.0193e-01,  2.1163e-01,
         1.8628e-02,  6.6843e-02, -7.7883e-02,  5.3114e-02,  1.2790e-01,
         5.7266e-02, -5.3234e-02, -2.4137e-02, -2.5843e-02, -3.9253e-02,
         3.3890e-02, -1.6916e-02, -3.3564e-02, -4.3526e-02,  6.4257e-03,
        -5.3442e-03,  3.8814e-02,  8.9823e-03, -8.0848e-02,  1.6765e-01,
         4.2023e-02,  1.5018e-01,  8.4493e-02,  1.8562e-04, -8.6709e-02,
        -9.7319e-02,  1.1367e-01, -3.3513e-02,  3.1824e-02,  1.8728e-02,
         4.4184e-02, -8.9428e-02,  2.1141e-02, -4.3926e-03,  8.5261e-03,
        -4.5368e-02, -1.1003e-01, -1.8414e-01, -7.6509e-02,  6.1407e-02,
         3.5894e-01, -1.3925e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.2654, -16.4151,   0.4774,   0.1114,  -1.0725,   0.0362,   0.0437,
          0.1891,   0.1563,   0.3629,  -0.2564,  -0.1000,   0.7312,   1.2861,
          0.5666,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8101e-01, -1.3493e+01, -1.3320e-01, -1.1865e-01, -1.2580e-01,
        -9.9164e-02,  3.7720e-01,  3.1339e-01,  2.3534e-01,  1.9627e-01,
        -1.7904e-01,  9.3286e-02,  2.2021e-01, -2.3430e-01,  2.2024e-01,
         3.4290e-01,  1.5440e-01, -3.3883e-01,  3.8389e-01, -8.5272e-02,
        -2.0239e-02,  1.5803e-01,  2.7098e-02,  3.3641e-01,  5.9983e-02,
         2.0280e-01, -1.4722e-01,  7.5325e-01,  6.5992e-03, -6.0115e-02,
         1.5416e-02,  6.1803e-02,  4.0309e-02,  6.3029e-02,  1.1031e-01,
        -1.6118e-01,  1.5924e-03,  3.4067e-02, -9.6109e-02,  5.8024e-03,
        -2.3864e-02, -4.4572e-02, -3.5524e-02,  4.2063e-02, -1.7304e-01,
         6.9816e-02, -3.3120e-01, -1.2537e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-1.5579e+00, -1.0118e+01, -5.3444e-01, -4.6263e-01, -1.3645e-01,
         5.1184e-03, -2.8025e-01,  3.9216e-01, -6.8345e-01, -1.2760e-01,
         4.7393e-02, -1.0675e-02,  1.5824e-01, -1.2109e-01,  6.2033e-02,
         1.5277e-01, -4.4978e-03,  3.0156e-01,  2.8597e-01, -1.3341e-01,
         2.1418e-01,  2.4766e-01, -9.1438e-02, -7.9358e-02, -6.5118e-02,
        -6.2959e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4775, 18.1127,  0.5032,  0.3920,  0.6456,  0.6119, -0.1184,  0.1183,
         0.9032, -0.3350,  0.0524, -0.6601, -0.1227,  0.0721, -0.1482,  0.4929,
        -0.7483, -0.6953, -1.0393, -0.1238,  1.6436,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8560e-01,  1.1148e+01,  4.7560e-02, -5.4763e-02, -2.7313e-01,
        -2.1454e-01,  1.1388e-01, -3.5300e-01, -5.3548e-02, -1.0504e-01,
         1.3593e-02,  1.5545e-02, -1.8966e-01, -2.1597e-02, -2.7418e-01,
        -1.4824e-01,  2.8075e-02, -3.0337e-01, -5.4306e-02, -3.4870e-01,
        -3.2289e-01, -1.0356e-01,  1.3771e-01,  1.0340e-02, -1.7448e-01,
        -8.4844e-02, -1.6534e-01, -1.1657e-01, -2.6077e-02, -7.5750e-02,
        -1.1043e-01, -5.2993e-02, -4.6578e-02, -6.7843e-02, -2.1421e-01,
        -6.6094e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2882,  8.5294,  0.6883,  0.0666, -0.1963, -0.0775, -0.1665, -0.0443,
         0.2583,  0.0989, -0.0901,  0.0829, -0.1596, -0.0202, -0.0779,  0.0893,
        -0.0517, -0.2841, -0.2934, -0.7293,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.6330e-01, -1.5370e+01, -8.2097e-01,  3.4339e-01, -3.8890e-01,
        -8.0172e-02,  2.0754e-01,  8.5805e-01,  1.7685e-01, -2.8998e-01,
        -6.6323e-02,  5.1391e-01,  9.3456e-03,  8.8636e-02, -6.9558e-02,
         1.7444e-02, -2.3817e-02, -5.1635e-02,  7.0668e-01, -3.0437e-01,
         2.8456e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9806,  9.4816,  0.9390,  0.0773, -0.2967,  0.1647, -0.0165, -0.0970,
         0.1474,  0.0350, -0.1448, -0.2118, -0.2065,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7283e-01, -1.4225e+01, -1.8003e+00, -2.2147e-01,  5.1725e-01,
         1.7516e-01,  7.7769e-01,  2.4793e-01, -6.3224e-02,  5.8307e-02,
        -1.1633e-01,  4.6917e-01,  4.4070e-01,  3.3251e-01,  4.4993e-01,
        -6.1038e-02,  3.1997e-01,  1.1899e-01,  3.7688e-02,  1.1158e-01,
        -2.4181e-01, -1.4281e-01,  2.3813e-01,  6.1154e-02, -5.3782e-02,
         4.3832e-01,  1.5327e-01, -7.0500e-02, -8.7889e-02, -6.5675e-02,
        -2.5111e-01, -6.4482e-02,  3.4362e-03,  3.3739e-02, -2.5324e-01,
        -2.4586e-02, -2.7800e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6573,  5.1745, -0.3121,  0.0182, -0.6251, -0.0685,  0.1596, -0.0096,
         0.0103, -0.1924, -0.1223, -0.0144,  0.3035,  0.1085, -0.0318,  0.0203,
         0.2782,  0.1791,  0.0687,  0.0654,  0.0265,  0.2125,  0.2668,  0.3622,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9540e+00,  8.0359e+00,  8.7900e-01,  1.0534e-01,  1.5143e-01,
        -4.4513e-02,  2.3914e-01,  1.6496e-01,  5.3371e-03, -3.4912e-02,
         1.5455e-01,  1.0574e-01,  1.6917e-01, -3.7003e-02,  7.7001e-02,
         1.0107e-01,  1.3088e-01, -1.1746e-01, -4.8727e-02,  1.5083e-01,
         2.3698e-03,  4.8252e-02,  4.5877e-02,  3.1337e-02, -8.7954e-02,
        -1.0554e-02, -8.9558e-02,  5.6862e-02,  1.9086e-01,  6.3819e-02,
        -1.7277e-01, -2.9467e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0069e-01, -1.6838e+01, -3.0041e-01, -6.8698e-01,  3.4492e-01,
         6.2667e-01,  4.8190e-03,  3.2403e-01, -6.0933e-02,  1.8321e-01,
        -4.9189e-02,  1.1908e-01,  5.8896e-02,  1.4641e-01,  4.8519e-02,
         1.5198e-01,  1.9362e-01, -1.4085e-01, -1.8012e-01,  1.8112e-01,
         2.2662e-01,  3.4984e-01, -1.4002e-01,  3.5380e-02,  4.3166e-01,
         3.0398e-01,  1.8207e-01,  2.4872e-02, -2.5511e-02, -4.3605e-02,
         3.2243e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3183e+00,  1.6094e+01,  1.5041e+00,  6.6262e-01, -3.5946e-01,
         1.0407e-01,  1.1551e+00,  3.2185e-01,  1.9326e-01,  2.4439e-01,
         1.0658e+00,  2.1998e-01, -2.9539e-01,  9.8674e-02,  5.1403e-01,
         4.7268e-01,  4.6490e-01,  4.6712e-01,  7.8422e-02,  5.4657e-01,
        -4.6053e-01,  1.2394e-02,  1.1139e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6081e-01,  6.6061e+00, -1.0893e-01,  2.4886e-01,  8.1871e-03,
         2.4957e-02,  1.9383e-02,  2.5613e-01,  1.4190e-01, -5.0337e-02,
        -9.5807e-02, -1.6508e-01,  4.9495e-03, -8.0618e-02,  1.7091e-01,
         6.0712e-02, -1.1016e-01, -1.1629e-01,  2.3663e-02,  1.5830e-01,
         1.7545e-03,  3.4863e-02, -3.4234e-02, -5.3351e-02,  1.0937e-01,
         3.9823e-02,  9.8659e-02,  9.7994e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-3.6928e-01,  8.4221e+00, -4.3259e-01, -7.0467e-02,  2.1821e-02,
         3.6346e-01,  3.8070e-01,  2.9916e-01, -3.4926e-01, -1.3004e-01,
         3.8904e-01, -4.9218e-02, -4.3512e-02, -1.2714e-01, -2.0014e-02,
         1.0649e-01,  4.2341e-03,  3.4488e-01, -1.7336e-01,  1.6450e-01,
         1.2064e-01,  1.0632e-01, -3.3080e-01, -1.4517e-02, -3.2908e-01,
         2.2476e-01,  4.0423e-01, -2.5452e-01, -5.6208e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5886e+00,  9.9435e+00,  1.5139e-01, -1.9759e-01,  7.1620e-01,
         2.3465e-01, -3.8929e-01, -2.3901e-01, -1.6633e-01, -3.3548e-01,
        -3.4855e-01, -1.8690e-01, -3.4276e-01, -2.8166e-01, -6.3004e-02,
        -1.2921e-01,  1.9799e-01,  1.8402e-01, -4.7294e-03, -1.8526e-01,
        -1.6792e-01,  2.4584e-01, -2.3681e-01, -8.4456e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2951,  8.7215, -0.2160, -1.0207,  0.1230,  0.1491,  0.0091, -0.0368,
         0.0440, -0.3641, -0.1194,  0.0471, -0.1001, -0.0319, -0.1812, -0.0139,
        -0.0275, -0.0214, -0.4340,  0.2623,  0.1656, -0.0907, -0.2056,  0.4009,
         0.1300, -0.1996, -0.0839,  0.0871, -0.1026, -0.0490,  0.0781, -0.2237,
        -0.0712,  0.4369], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0153e-01,  1.1778e+01, -1.4459e+00, -3.3114e-01, -9.7876e-02,
        -3.7367e-01, -2.4338e-02, -8.0372e-02,  1.7580e-02, -1.1124e-01,
         7.7454e-02, -1.5034e-02,  4.6790e-02, -2.3243e-01, -1.4866e-01,
         1.4821e-01,  4.1724e-02,  2.3693e-02,  1.2102e-03,  1.6214e-01,
        -1.8872e-01,  1.3241e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.6609e-01, -4.2815e+00,  7.3320e-02, -2.3480e-01,  5.9929e-01,
         5.0708e-01, -1.4047e-02, -2.3004e-04,  1.9223e-02, -4.1680e-02,
        -1.9501e-01, -1.2961e-02, -3.5487e-02, -5.3844e-02, -7.6932e-03,
         1.8831e-02,  9.3562e-02, -2.5897e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3911,  7.3953,  0.7594,  0.0603, -0.2329,  0.0329,  0.4055,  0.1113,
         0.0615, -0.0910,  0.1125, -0.1801, -0.1451, -0.1231, -0.0153, -0.0906,
        -0.0637, -0.1737,  0.1497,  0.0602,  0.0849, -0.4793, -0.5218,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4210,  7.8633, -0.3320,  0.4458,  0.4917,  0.1889, -0.0741, -0.0516,
        -0.0525, -0.0828, -0.1903, -0.0496,  0.0278,  0.0452, -0.4859, -0.2977,
         0.5269,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5185, 14.5747,  1.1289,  0.3789,  0.1900,  0.2685,  0.1727,  0.0722,
        -0.4904,  0.3515,  0.1545,  0.1167,  0.2391, -0.0344,  0.8842, -0.5841,
         0.3518,  1.4276,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6561, 11.8291,  1.2336, -0.1998,  0.1317,  0.6144,  0.0347,  0.0805,
         0.1947,  0.4280,  0.0979, -0.2615,  0.0616,  0.1157, -0.1463, -0.2842,
        -0.3648, -0.2461, -0.0680, -0.0295, -0.0319, -0.0952, -0.1202, -0.2639,
         0.0589,  0.3386,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2157e-01, -7.9018e+00, -2.4717e-01,  3.8684e-01,  2.1189e-01,
         1.3984e-02, -2.1956e-01, -7.7005e-02,  4.5049e-02,  2.1348e-01,
        -1.8503e-01, -8.6418e-02,  7.1298e-02,  8.2426e-02,  8.6213e-02,
         2.3845e-02,  2.1338e-01,  4.4127e-02,  5.8520e-02, -3.5280e-02,
        -4.8292e-02, -7.8948e-02,  1.4991e-01,  1.1047e-02, -2.1125e-02,
         3.0781e-02, -4.7614e-02,  5.5413e-02, -2.6486e-03, -2.7323e-01,
         2.6175e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0180e+00,  9.2712e+00,  6.4857e-01,  2.3820e-02, -2.0733e-01,
         6.0512e-02, -2.3045e-01,  3.4189e-01, -2.0466e-01,  9.1807e-01,
         1.7599e-01, -2.3260e-01,  4.1970e-01,  2.5621e-01,  2.3547e-03,
         4.0174e-01,  1.8533e-01, -8.5798e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1417,  7.8516, -0.1173,  0.1813,  0.3862, -0.0532, -0.2023,  0.4234,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.9748e+00,  1.2227e+01,  3.2052e-01, -2.6214e-01, -8.9334e-02,
         1.2025e-01, -2.1724e-02,  1.2654e-02, -2.6131e-01,  1.1496e-01,
         1.4597e-01, -3.4691e-01, -3.2997e-02, -5.8452e-02,  1.5723e-01,
         2.0818e-01,  5.0756e-02, -5.8545e-02, -5.7275e-02, -1.5078e-01,
        -4.7432e-02, -4.2948e-02, -3.0015e-01,  6.3684e-02, -3.3042e-02,
         1.1875e-03,  7.9118e-02,  2.5004e-01,  1.5773e-01,  1.3319e-01,
         1.1116e-01,  6.5343e-02,  4.0780e-02, -6.5753e-03, -1.8121e-01,
         1.0227e-01,  5.3607e-02,  1.2518e-02, -3.1120e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1695,  5.3597, -0.6047, -0.1251,  0.1744,  0.1221, -0.2167,  0.3180,
         0.0856,  0.1165,  0.0178,  0.1230,  0.2571, -0.0274,  0.1820,  0.1108,
        -0.0607, -0.0659,  0.0193, -0.2787, -0.2175, -0.4716,  0.0433, -0.0279,
        -0.0243, -0.0298, -0.5364, -0.3616,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1416, -8.3172,  0.1161,  0.2299,  0.0454,  0.1627, -0.2594, -0.2563,
        -0.2093, -0.2306,  0.4135, -0.1422, -0.4119,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1475,  5.1144, -0.4205,  0.0151,  0.0092,  0.0955, -0.1270,  0.0158,
        -0.0178, -0.0722, -0.0301, -0.0509, -0.1709, -0.0350, -0.5105,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4360e-01,  5.9969e+00, -1.9574e-01, -5.6882e-01,  2.5292e-01,
        -1.2919e-01, -3.6880e-04, -2.3919e-01, -1.1981e-01, -2.1895e-01,
         9.9768e-02,  1.7121e-01, -2.1966e-01,  9.6983e-02, -1.9722e-01,
        -6.7767e-02,  1.6368e-01,  1.0506e-02, -1.0817e-01, -3.8075e-02,
         2.0405e-01, -8.2419e-02,  1.2780e-01,  2.9825e-03, -2.0914e-01,
         8.2100e-02, -2.3537e-02,  8.8746e-02,  1.6828e-02, -9.5085e-02,
         5.4634e-02, -5.1390e-02,  8.3478e-02,  4.8704e-02,  2.1213e-01,
        -8.8406e-02, -3.2105e-02, -6.5528e-02, -1.4907e-02, -3.7575e-02,
         2.8953e-01, -1.9047e-02,  3.6172e-02, -2.6340e-02, -2.6362e-03,
        -4.7130e-01, -3.6930e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6309e-01,  1.1421e+01,  6.4477e-01, -6.3150e-02,  1.8898e-01,
        -5.9148e-01, -2.7147e-01, -3.4255e-02, -3.9204e-01,  6.4640e-02,
        -2.1373e-01, -3.0785e-01,  5.7466e-02,  4.5468e-02,  2.4328e-04,
        -1.2149e-01,  1.4565e-01, -6.9951e-02,  5.5921e-02,  1.0315e-01,
         3.2754e-02,  2.1109e-03,  5.1468e-02,  5.0465e-02,  1.5668e-01,
         4.1390e-02,  1.3296e-01, -3.5506e-02,  5.1619e-02, -4.6706e-02,
         8.7587e-02, -1.2729e-01, -3.4825e-02,  1.3095e-01, -1.5824e-02,
         4.0436e-02, -1.7919e-02, -3.7585e-02, -3.1382e-02, -2.7398e-02,
         4.1871e-02,  4.0699e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9379, 15.4928,  1.5001,  0.0667, -0.3778, -0.1082,  0.2087,  0.0351,
        -0.4702, -0.2737,  0.3188, -0.4554,  0.3356, -0.0299,  0.5093,  0.2022,
        -0.3062,  0.2038, -0.2154,  0.0736, -0.4982, -0.0874, -0.6018,  0.5580,
        -0.5563,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8312, 10.0273,  1.7417,  0.0551, -0.2302, -0.2019, -0.1813, -0.1051,
        -0.0789, -0.1764,  0.0362,  0.0913,  0.0475,  0.2954, -0.2408, -0.4392,
         0.0809,  0.1074, -0.0687, -0.3704, -0.3606,  0.0369,  0.7272,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6899, 10.4396,  1.0361, -0.3005,  0.2943, -0.1800, -0.2541, -0.6894,
         1.9531,  0.1589,  0.1634, -0.1647,  0.0819,  0.4250,  1.1689, -0.0390,
        -0.8255,  0.0114,  0.6098, -0.2003, -0.2800,  0.5820,  1.9566,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8129, 17.6459, -0.8451, -0.4533, -0.5759, -0.0468,  0.8407, -0.4947,
         1.1527, -0.2119, -0.6618,  0.4555, -1.2270,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9463, 11.1280, -1.6980, -0.0652,  0.1620, -0.2516, -0.1511,  0.3541,
         0.2428,  0.0837, -0.2436,  0.1643, -0.0578,  0.1015,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3422,  6.6323,  0.2358, -0.0473,  0.2746,  0.0698, -0.0522,  0.2159,
         0.0296, -0.2284,  0.0533, -0.1840, -0.0283, -0.0363,  0.2165,  0.0079,
        -0.1310,  0.2017, -0.0312,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-4.2173e-01,  1.6052e+01,  3.2381e-02, -6.3981e-01, -1.5850e-01,
         1.1788e-01, -1.0288e-01, -2.5873e-02, -5.9803e-03,  3.2981e-01,
         1.1547e-01, -2.7227e-01, -1.4747e-01, -1.3903e-01, -4.7434e-02,
        -6.4920e-01,  1.8631e-01, -8.9229e-02, -4.8742e-02,  1.8659e-01,
        -1.2864e-01,  2.1355e-02,  4.8644e-02, -1.2791e-01,  2.4534e-01,
        -6.0173e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3835e-01, -1.0784e+01, -3.5402e-01, -6.3245e-01, -7.6410e-02,
        -1.7123e-01,  1.9880e-01,  2.1169e-02, -2.3920e-02,  4.5210e-01,
        -2.1516e-01, -9.4614e-03,  9.4187e-02, -2.6760e-01, -1.6276e-01,
         2.3624e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4816, 15.9715,  0.4178,  1.4639, -0.3702,  0.6759,  0.5408, -0.4791,
         0.3187, -0.1497, -0.2816, -0.5703, -0.1535, -0.0218,  0.4510, -0.5893,
         0.4817,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7573e+00,  1.5306e+01,  1.3983e+00,  1.2649e-02, -1.9712e-01,
         1.5946e+00,  1.9066e-01,  8.5455e-01, -1.3082e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1780,  7.8864,  0.9357,  0.3285, -0.3414,  0.0370,  0.3566, -0.0243,
         0.5901, -0.2174, -0.0432,  0.0380,  0.0268,  0.1448,  0.1084,  0.0468,
         0.1235,  0.1187,  0.0608,  0.2730,  0.1627,  0.2445, -0.0135,  0.0122,
         0.0377,  0.1904, -0.2388,  0.0307,  0.1872,  0.0637,  0.1021,  0.0748,
        -0.0524, -0.0372, -0.0487, -0.1817,  0.0883,  0.0904, -0.1421,  0.0147,
        -0.1252, -0.0841,  0.0644, -0.1458, -0.0822,  0.8357], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6894e+00,  1.2869e+01,  1.4287e-01,  2.8587e-01, -3.3134e-01,
         6.9150e-04,  2.8361e-01, -5.7830e-01, -6.6220e-01,  3.3034e-02,
        -1.9173e-01, -1.2166e-01,  4.4558e-01, -9.3694e-02, -1.3925e-01,
        -3.7145e-01,  1.3701e-01, -1.4986e-01, -2.3726e-01,  2.1443e-01,
        -4.1883e-02, -1.1516e-01,  2.7608e-01, -1.4909e-01,  2.2807e-01,
         5.0247e-02,  5.0282e-01, -1.9047e-01, -1.0354e-01, -1.4479e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4333, -4.5632, -0.2400,  0.1756,  0.0660,  0.0998, -0.3640,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -3.1222, -18.5762,  -1.3769,   0.3190,  -0.1566,  -0.3273,  -0.5544,
         -0.4141,  -0.3102,  -0.1371,   0.3169,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3818, -9.0152, -1.1384, -0.2178, -0.0871, -0.2980, -0.0963,  0.1280,
         0.0092,  0.2621,  0.0304, -0.2431,  0.0216,  1.1385,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3391e-01,  1.3990e+01,  1.4732e+00, -3.7935e-01,  2.4144e-01,
         8.6487e-02,  1.1012e-01, -7.2228e-02,  4.8344e-02,  1.4067e-02,
         2.6611e-02, -1.0916e-01, -5.9624e-02, -2.8383e-01,  7.1888e-02,
        -6.5881e-02,  4.9061e-02, -1.3395e-02,  2.9174e-02,  1.6845e-01,
        -1.6155e-01,  3.1721e-01, -2.8838e-01, -1.2157e-02,  3.6119e-02,
         1.1331e-01,  4.3508e-02,  4.6135e-02, -2.7955e-01,  9.3841e-02,
        -5.6126e-01,  1.2240e-01,  9.9960e-03,  4.1410e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2610,  6.7159, -0.0830,  0.1007, -0.1177, -0.0164,  0.1081, -0.0578,
         0.0671,  0.1478, -0.0147, -0.0078, -0.0226,  0.3071, -0.1790, -0.0982,
         0.0148,  0.3804,  0.0290,  0.1025,  0.2341,  0.0080,  0.0432, -0.0780,
        -0.0742, -0.0115, -0.1017,  0.1482,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.4802e-01,  1.9800e+01, -1.2563e+00, -8.8822e-01, -7.1992e-01,
         2.6950e-01, -8.2780e-02, -1.1758e-01, -2.9997e-01,  2.5250e-01,
        -2.6327e-01, -4.0654e-02,  1.2585e+00, -2.3833e-01, -5.9891e-01,
         1.8407e-01,  3.8026e-03, -1.9239e-01, -2.6206e-01, -2.3790e-01,
         6.9546e-02, -9.4182e-02,  3.2131e-01,  2.5406e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 1.1166e+00, -9.7490e+00,  2.3465e-01,  6.0406e-02,  2.3555e-01,
        -4.9463e-02,  3.7805e-01,  5.3581e-01,  4.3450e-01,  6.5181e-02,
         6.5033e-01,  1.0382e-01,  3.0027e-02, -1.6465e-01,  1.1279e-01,
         1.8321e-01,  2.3737e-01,  1.7256e-01,  3.1094e-01,  1.3497e-01,
        -1.7240e-02,  6.9841e-02,  2.9107e-02,  2.2127e-01,  1.0844e-01,
        -9.1389e-02,  5.3363e-02, -9.0436e-03,  1.2620e-01, -1.9079e-01,
        -4.2910e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5738e+00, -9.1195e+00, -4.1679e-01,  2.4650e-01,  1.9422e-01,
        -5.3506e-01,  3.9564e-03, -5.6520e-01, -3.5168e-01, -4.5598e-01,
        -9.1273e-02,  1.7543e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9370, -2.4274,  0.0978, -0.2765,  0.0507,  0.0204,  0.1652,  0.0089,
        -0.0592, -0.0916,  0.0347,  0.2962, -0.3481,  0.2231, -0.4551,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5271e-01, -1.2124e+01, -4.9685e-01, -2.0882e-01, -4.7033e-01,
        -7.0176e-02,  5.4145e-01,  9.8192e-03,  3.1988e-02, -2.0940e-01,
        -4.4123e-01,  4.5579e-01, -2.1903e-01, -1.5602e-01, -9.8424e-02,
        -1.2869e-01, -1.2435e-01, -8.2443e-02,  2.3906e-01,  2.3632e-01,
        -7.0461e-02, -1.0308e-01, -1.9545e-01, -1.8319e-01, -7.2845e-04,
         1.8754e-01,  7.3332e-02,  4.1677e-02,  7.7841e-02,  1.8764e-01,
         6.6151e-02, -3.7571e-02,  1.9739e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0242, -7.5840,  0.3050,  0.3240, -0.3017, -0.2042,  0.1592,  0.1843,
         0.0740,  0.1248, -0.0474, -0.1115,  0.3367,  0.6227, -0.0407,  0.8166,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5694,  9.2923,  0.6964, -0.1643,  0.7747,  0.4234,  1.7141, -0.0516,
        -0.3324,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3782,  4.6714,  0.0285, -0.2068, -0.0663,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  7.4338, -26.7682,   0.6402,   0.6258,  -0.5393,   0.2052,  -0.7851,
         -0.0465,  -0.8554,  -1.5917,  -0.7373,  -1.3731,   0.6344,  -1.1828,
         -0.5720,  -0.5379,  -0.2970,  -0.6527,  -0.6844,  -1.3341,  -0.3588,
         -0.7032,  -0.2371,  -0.5766,  -0.2893,  -0.9135,  -1.6307,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7025, -9.7513, -1.7461,  0.2234,  0.2756,  0.5135, -0.5740,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.2349, -36.3242,  -2.9093,  -0.1051,  -0.5455,   0.5020,  -0.4865,
         -0.0700,   0.4463,   1.7801,  -0.0638,  -0.2445,   0.3522,   0.5670,
          0.6197,   0.3624,  -0.0373,   0.1887,  -0.0915,  -0.0837,  -0.1848,
          0.1312,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2138, -7.6403, -0.1430,  0.0943,  0.7560,  0.0259,  0.0188, -0.3833,
        -0.0149, -0.1377, -0.1317,  0.1754,  0.4480,  0.0961,  0.0116, -0.0544,
        -0.0178, -0.1961,  0.1210,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1889e+00, -1.2750e+01, -1.5218e-01, -1.1228e-01, -6.1694e-01,
         2.1161e-02, -1.9092e-01, -1.5888e-01, -3.0175e-01, -1.9142e-01,
        -6.4391e-03, -2.9542e-02,  7.5010e-03,  5.8032e-02,  8.6557e-03,
         1.3781e-01,  2.4827e-01,  3.6670e-02,  6.3732e-02,  9.0806e-03,
        -1.2626e-01, -1.1207e-02, -6.6796e-02, -1.2038e-02,  3.9164e-02,
         1.3600e-02, -3.9673e-02, -1.3905e-01, -1.0531e-01,  6.3196e-03,
        -2.3266e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-2.2953e+00,  1.9989e+01,  4.7001e+00, -7.3746e-01,  2.9752e-01,
         1.1813e-01, -2.0681e-01,  1.4276e-01,  1.1510e-01, -4.8274e-03,
         1.0976e-01,  3.0194e-01,  2.2859e-01, -7.0874e-01,  2.2571e-02,
         9.5230e-01, -4.8717e-01,  2.5661e-01, -1.1303e+00,  1.3932e-01,
        -3.7189e-01,  2.2599e-01, -3.8505e-01, -6.5202e-01, -2.7781e-01,
         5.4661e-02, -1.2715e-01, -1.1632e+00, -4.0677e-01,  2.5762e-02,
         9.6896e-01, -2.5874e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7673e-01, -1.4412e+01, -2.7977e-01, -1.5324e-01,  4.5815e-01,
        -4.9286e-04, -3.3229e-01, -5.4407e-02, -4.7954e-01, -3.3935e-01,
        -5.3694e-01, -4.2719e-01, -1.5026e-02,  3.3131e-01, -2.4040e-01,
         6.2956e-02,  1.9442e-01, -2.5705e-01,  2.0795e-01, -1.2414e-01,
        -4.9504e-01,  9.5853e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9774e-01,  7.7397e+00, -2.6544e-01,  1.6549e-01, -3.4155e-01,
        -5.6140e-03, -4.9941e-01,  1.4844e-01,  2.3713e-02,  1.3404e-01,
         4.4332e-02, -1.4825e-01, -9.3232e-02,  1.8112e-02,  2.1875e-02,
         3.8424e-02,  8.8954e-03,  1.0033e-01,  3.4607e-02,  8.1250e-02,
         1.2985e-01,  4.2967e-05,  2.5681e-02, -7.5060e-02, -3.7487e-03,
         1.1530e-01,  3.8764e-02, -1.4136e-01,  2.9659e-01,  7.3613e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4042,  9.2523, -0.0224, -0.4285,  0.0186, -0.1414, -0.2063, -0.2085,
        -0.0111, -0.1933, -0.2636, -0.0490, -0.0713,  0.0636, -0.1244, -0.0223,
         0.0306,  0.3014,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.6384e-01,  2.1245e+01, -3.0114e+00, -9.0651e-01, -2.6677e-01,
        -1.7956e+00,  2.5332e-01,  4.4292e-01, -1.0209e+00, -1.0409e-01,
         1.3676e-02,  1.6768e-01, -4.0171e-02,  1.3175e-01,  3.5962e-01,
        -6.4855e-01, -3.1869e-01, -2.5010e-01, -2.1954e-02, -2.5601e-01,
        -3.5214e-01, -1.2690e-01, -1.1638e+00,  3.7300e-02, -9.9683e-02,
         4.8475e-03,  4.5081e-01,  4.9292e-02, -1.9549e-01, -7.3385e-01,
         1.3028e-01, -2.3049e-01,  2.6338e-01,  1.0887e-01,  3.2077e-01,
        -1.6050e-01, -5.2165e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5383, -8.1090, -0.5698,  0.0422, -0.0376,  0.0913, -0.0322,  0.0599,
         0.0719, -0.0340, -0.0963,  1.2710, -0.0975, -0.1841, -0.0636,  0.0452,
        -0.0439,  0.0104,  0.2040, -0.1331, -0.2071, -0.2274, -0.8559, -0.1305,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0413e-02, -1.3996e+01, -3.8038e-01, -3.8050e-01, -5.0536e-01,
        -3.2788e-02, -8.5968e-02, -7.4674e-02, -5.3375e-01, -1.6057e-01,
         2.5657e-01, -8.2941e-02, -5.9258e-02, -8.0652e-02, -2.6110e-01,
         1.6426e-01,  4.8949e-02,  1.6395e-01,  1.6268e-02,  2.3685e-01,
        -1.6403e-02,  2.7872e-01, -1.5781e-01, -1.1565e-01, -3.0331e-01,
        -4.4693e-01, -1.2157e-01, -2.3272e-01, -4.5276e-02, -5.9110e-02,
        -3.1612e-01,  5.8188e-04, -2.6931e-03,  1.4775e-01, -1.1359e-01,
         1.2580e-01,  3.1253e-02, -1.5740e-02, -1.0503e-01,  9.7139e-02,
         4.5548e-01,  1.4420e-01,  1.3550e-01, -1.2282e-01,  9.7781e-01,
         1.6670e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1722, 10.9327, -0.2875,  0.3697, -0.1179,  0.5180,  0.6096, -0.2427,
        -0.0714, -0.4610,  0.4194,  0.4373,  0.4784,  0.0332,  0.0567,  0.0798,
        -0.0198, -1.2328, -0.2448,  0.1442,  0.0599,  0.3515,  1.7474,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.1977, -21.3981,   0.2308,  -0.2304,  -0.9754,   0.8679,  -0.4491,
          1.2840,   0.1979,   0.5046,  -0.2398,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7241, 12.2994,  0.3544, -0.8849, -0.2596,  0.9230,  0.1341,  0.0289,
        -0.1402, -0.0795, -0.0448, -0.0213,  0.0147,  0.5722,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6587, -8.3971, -1.1447, -0.2531,  0.2170, -0.1246,  0.3514,  0.0765,
         0.0096,  0.3619, -0.1331, -1.4905,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0522e-01,  3.4335e+00, -1.4955e-01, -2.8778e-01, -6.1242e-02,
        -8.2834e-02, -6.2771e-02,  4.1535e-02,  2.7366e-03,  7.0561e-02,
        -1.5132e-02, -2.7161e-02,  3.0689e-02, -2.3989e-02, -3.4678e-03,
        -4.1495e-02, -3.7696e-02, -5.9030e-02, -5.5543e-02, -7.9294e-02,
        -1.3767e-02,  2.5721e-03,  2.0992e-02, -4.0474e-02,  8.3382e-02,
        -2.0253e-01,  9.5106e-02,  2.2024e-02, -4.2173e-02,  1.5878e-02,
        -3.1001e-02,  7.1659e-02,  2.6310e-02,  8.1652e-02,  6.3271e-02,
         5.8092e-04,  2.8436e-02,  2.0918e-02, -5.5936e-03,  3.0494e-02,
        -8.5562e-03,  2.1287e-02,  3.2602e-02,  1.0757e-02, -6.2546e-03,
         2.6292e-02,  2.2673e-02, -1.8988e-02,  9.8781e-04,  8.4328e-03,
         3.4839e-02, -1.3080e-02,  9.8173e-04, -1.4594e-03, -1.2087e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-6.2946e-02, -9.2216e+00, -6.9586e-01, -4.2031e-01, -6.9261e-03,
        -1.0962e-01, -3.2397e-01,  6.3576e-03, -5.5736e-02, -1.8742e-01,
        -2.2182e-03, -9.2732e-04, -2.4835e-01,  1.1595e-02,  3.4950e-01,
         1.0052e-01,  3.1166e-01, -2.5148e-01, -5.2062e-03, -1.6893e-01,
         2.0865e-01, -3.3842e-01,  2.4526e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.0732, -14.7651,  -0.7199,   0.0759,   0.0300,   0.0787,   0.1755,
          0.4602,   0.0818,  -0.7969,   0.4367,  -0.2563,   0.1289,   0.0863,
          0.2848,  -0.1475,   0.7724,  -0.7130,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2178,  5.4172,  0.4032, -0.1225,  0.4021,  0.2189, -0.1348,  0.2615,
         0.2908, -0.2120,  0.1326,  0.1171,  0.2581, -0.1299, -0.1431,  0.1266,
         0.4575, -0.3376,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.9552, -10.6568,   0.2376,   0.0399,  -0.2379,   0.4494,  -0.0595,
          0.7822,   0.3490,   0.1330,  -0.0111,  -0.5312,  -0.4050,   0.2984,
          0.4688,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3380e-01,  1.3844e+01, -2.0733e-01,  1.8049e-01, -2.0422e-01,
         3.3992e-01,  6.2856e-01,  1.0767e-01,  1.0400e+00,  5.0330e-01,
        -4.4238e-02,  8.4325e-02,  1.3614e-01,  8.9149e-03,  3.1866e-01,
         2.2041e-01,  6.3218e-02,  4.1291e-01,  2.6769e-01, -4.9510e-01,
         1.1632e-01, -5.2716e-01,  8.2261e-02,  5.0669e-02,  1.5758e-01,
         6.0745e-01,  1.2600e+00,  1.8797e-01, -8.6792e-01,  8.6206e-01,
         4.7521e-01,  4.9748e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.5125, -10.6555,   1.3498,   0.2850,  -0.6967,   0.0865,  -0.0140,
          0.1812,   0.0253,  -0.0397,  -0.0612,  -0.0911,  -0.2297,   0.4930,
          0.0191,   0.3094,  -0.0635,   0.2545,   0.4212,   0.3277,   0.5363,
          0.5436,   0.4034,  -0.1359,   0.1170,   0.0527,   0.0457,  -0.6299,
         -0.0117,   0.1235,  -0.0454,  -0.2884,  -0.9469,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3785e-01, -1.3812e+01,  1.1492e-01, -1.0645e+00, -8.4985e-01,
        -3.5694e-01, -6.9301e-02,  2.7007e-02,  1.3457e-01, -2.2000e-01,
        -1.1321e-01,  3.6355e-01,  1.2985e-01,  1.4674e-01,  5.1659e-02,
        -1.0030e-02, -1.3938e-02,  2.6468e-02,  2.6541e-01,  5.9516e-02,
         2.2378e-02,  3.6756e-02,  1.8722e-01, -4.8380e-02, -1.0433e-01,
         1.6321e-01, -1.6779e-01, -2.2005e-02, -1.5024e-02, -2.6145e-02,
        -9.9252e-02, -6.2051e-02, -4.5439e-02,  3.3012e-02, -1.8741e-02,
         2.2553e-01,  2.4648e-02,  1.7047e-02, -2.1401e-02, -1.6026e-01,
         1.3035e-02,  3.3080e-03,  4.0779e-02,  1.1932e-01,  6.7291e-02,
        -5.3630e-02,  5.4945e-02,  6.2438e-02, -1.7356e-02, -7.3671e-03,
         2.0048e-01, -6.7403e-02, -4.8511e-02, -2.3481e-02, -1.7653e-02,
        -4.4649e-02,  1.1821e-01, -1.4569e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.6380e-01, -1.8387e+01, -1.6919e+00, -1.9259e-01, -6.9158e-02,
        -3.2472e-02, -3.9885e-01,  1.0560e-01,  3.0112e-02, -7.0666e-02,
         2.6048e-02,  1.2715e-01, -1.1364e-01,  7.4762e-01,  1.8917e-01,
        -7.4990e-02,  6.2548e-02,  9.5603e-02, -1.2060e-04, -4.2938e-01,
        -8.0511e-02, -1.0188e-01,  4.0288e-01, -7.1182e-02,  6.7979e-02,
         2.7345e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4030e-01,  8.4290e+00, -3.8904e-02, -2.4399e-01,  6.3874e-02,
         3.6098e-01,  1.4793e-01, -1.2780e-01,  1.8567e-01, -4.4276e-02,
         5.3125e-02, -2.6985e-01, -1.0115e-02, -1.5051e-01,  6.4943e-02,
        -2.3961e-02, -5.0949e-02, -1.3647e-01,  2.2638e-01, -4.7478e-03,
        -2.5505e-02, -4.2283e-01, -8.0408e-02, -1.6107e-02, -6.6353e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4231e-02, -1.1442e+01,  2.1841e-01,  6.9101e-02, -2.4436e-01,
         3.7052e-01, -2.3037e-01, -1.1818e-01, -1.6734e-01, -4.6823e-01,
        -3.2128e-02,  1.0356e-01, -2.7303e-01, -4.7297e-01,  7.3335e-01,
         1.8945e-01,  1.0799e-01, -1.3571e-01, -6.7174e-02, -4.5441e-02,
        -2.1452e-03,  2.7003e-02,  1.4702e-01,  4.3896e-02,  7.8271e-04,
        -2.7610e-02,  5.5424e-02, -5.7445e-03,  3.4456e-02, -1.4674e-01,
         1.4302e-02,  2.3322e-01, -2.0677e-01, -6.9516e-02, -1.9199e-01,
         2.1647e-02,  4.5336e-02, -1.8806e-01, -4.6867e-02,  2.2973e-02,
         2.6750e-02, -7.6976e-02, -2.0382e-01,  1.1565e-01,  4.5779e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9915e+00, -2.0259e+01, -1.8203e+00, -1.3876e+00,  8.2480e-02,
         1.9249e-02, -1.1294e-01, -5.3879e-01,  3.6731e-01,  2.6463e-01,
         6.8012e-01,  2.0035e-01, -3.0094e-01,  9.9635e-02, -1.0003e-01,
         4.9703e-01,  3.6735e-01,  2.3204e-01,  2.7226e-02,  1.6370e-01,
         3.3680e-01,  5.6290e-01,  3.0845e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4229, 16.9791,  0.4181, -0.1745, -0.1167,  0.3204,  0.5101, -0.1373,
         0.0919, -0.0632,  0.0279, -0.5075,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 6.0127e-02,  1.7521e+01,  1.9226e-01, -1.0880e+00, -5.7429e-01,
        -1.5334e-01,  6.2142e-01,  5.4242e-01,  1.1684e+00, -4.8591e-01,
        -2.7157e-01,  1.6959e-02,  1.9672e-01,  2.2041e-02,  1.0753e-01,
         6.0851e-02, -5.8954e-02, -4.8036e-01, -2.2410e-01, -1.0382e-01,
        -1.1194e-01, -1.7463e-01, -2.1018e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.6338e-01, -9.3598e+00,  4.1418e-01,  5.7937e-02, -7.8681e-02,
         1.4692e-01, -4.3097e-02,  2.3092e-01, -7.6142e-03, -1.0881e-01,
        -2.7752e-02,  7.4162e-02,  8.6107e-02, -1.5775e-02, -8.6007e-02,
         5.6959e-02,  8.5336e-02,  3.6374e-01,  1.9797e-01, -4.7723e-02,
        -3.1512e-01,  4.2890e-02, -1.1814e-02,  1.7424e-01,  2.8967e-03,
        -4.1414e-02,  9.0891e-02,  4.8898e-02,  1.8671e-02, -6.9182e-02,
         9.8696e-02,  2.2659e-01,  5.9243e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8801e-02,  1.6094e+01, -1.1438e+00, -6.4781e-02, -2.2462e-01,
         2.5056e-01, -3.5866e-01,  5.3927e-02,  4.3104e-02, -2.8808e-01,
        -5.7921e-01,  1.0683e-01, -5.2763e-03,  5.5037e-01,  4.8557e-01,
        -4.3346e-01, -3.7187e-01, -2.5612e-01, -3.7830e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1159, -8.3806,  0.2819, -0.5458, -0.4693, -0.6428, -0.4069,  0.0784,
        -0.4106,  0.3712,  0.2705, -0.3367, -0.3303,  0.2924,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.6909, -18.1012,   0.8094,   0.4324,  -0.3348,  -0.1676,  -0.2781,
          0.3410,   0.2456,   0.3443,   0.1457,   0.0259,  -0.2611,   0.1747,
          0.0975,   0.0561,   0.4637,   0.1755,   0.1669,   0.0228,  -0.6225,
         -1.0300,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8090e-01,  1.3165e+01,  5.4750e-01, -4.3831e-01, -4.8835e-02,
         2.3478e-01, -6.2215e-02, -5.4134e-02, -2.7852e-02, -3.9581e-01,
        -3.1764e-03, -1.3791e-01, -5.0060e-01, -4.1246e-02, -4.7283e-03,
         7.7868e-02,  2.7549e-01, -9.3001e-02, -3.0051e-02,  3.1680e-01,
        -1.0018e-01, -2.8210e-02, -1.1168e-02,  2.2149e-02, -1.5453e-02,
         1.9920e-02, -1.1054e-01,  2.8165e-01, -1.7236e-01, -4.7060e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.9194e-02,  9.7483e+00,  7.4819e-01, -2.0322e-01,  2.4648e-01,
        -2.2884e-01, -6.1699e-01,  3.9823e-02,  1.1156e-01,  9.2044e-03,
        -1.4799e-01, -2.1079e-01,  8.6568e-02, -1.1660e-01, -2.3251e-01,
        -7.3219e-02,  2.2607e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.1874, -15.5000,   0.3608,  -1.9901,   0.7903,   0.0333,   0.2343,
         -0.2793,   0.6276,   0.3771,   0.4873,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2853e+00,  3.0145e+01,  1.1038e+00, -6.7954e-01, -4.4451e-01,
        -2.0236e+00, -4.0899e-01, -5.2881e-01, -3.0523e-03,  1.4201e-01,
        -2.2026e-02,  4.8345e-01, -3.8703e-02,  6.6242e-01, -1.6700e+00,
        -9.8498e-02, -1.0854e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1491e-01,  4.5881e+00, -1.1978e-01,  8.7927e-02,  5.0437e-02,
         9.4651e-02, -8.0081e-02,  5.0011e-02, -1.5296e-02, -1.0790e-01,
        -8.9832e-02,  6.3085e-02, -3.8189e-02, -7.3600e-02, -4.2729e-01,
        -1.7081e-02,  4.1303e-02, -6.5244e-02, -7.8934e-04,  5.3998e-03,
        -3.6214e-02, -3.2628e-02, -4.8280e-02,  2.3057e-02,  9.1884e-02,
         1.4144e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9611, 11.4556,  1.5484, -0.2930, -0.0987,  0.3919,  0.1785,  0.4755,
         0.2628,  0.0707, -0.1008, -0.3355, -0.1058,  0.2440,  0.0191, -0.2622,
        -0.0261,  0.2315, -0.0882, -0.0435, -0.0646, -0.1289,  0.0870, -0.0729,
         0.0831, -0.1539,  0.0487,  0.1055, -0.5774,  0.6038,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.8498, -20.8854,  -1.1732,   2.1519,  -0.5119,   0.1735,  -0.5710,
          0.6993,   0.1292,   1.1675,   0.4934,  -0.9973,  -0.5794,   0.3095,
          0.3959,   0.1872,  -0.1959,   1.5697,  -0.6245,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.1576, -9.3827, -0.7833, -0.1706, -0.3856, -0.3282, -0.5879, -0.0564,
        -0.2414, -0.2078, -0.0228, -0.0708, -0.1587, -0.1112, -0.0933,  0.1942,
        -0.0899,  0.2069,  0.0771, -0.1313,  0.0410,  0.0998, -0.0898,  0.0130,
         0.2622,  0.4964,  0.3410,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1643, -6.5329, -0.2467,  0.0374,  0.3858, -0.2524,  0.0137,  0.0743,
         0.1713, -0.1761, -0.1311,  0.2410, -0.4979, -0.0767,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.1130, -10.9119,  -0.0637,   0.3416,  -0.3418,  -0.4316,  -0.3340,
         -0.1354,  -0.5234,   0.1721,  -0.1269,  -0.2170,   0.0269,  -0.1480,
         -0.0743,   0.4871,   0.0789,  -0.3999,   0.0383,  -0.7104,   0.2582,
         -0.0979,   0.0363,  -0.0693,   0.2514,   0.3275,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.7188e-01, -1.0657e+01, -9.5154e-01,  6.3051e-02,  1.2448e-01,
         6.5712e-01, -3.5386e-02,  3.0657e-01,  1.5141e-01, -2.8465e-01,
         1.8043e-01,  6.9549e-01, -8.9097e-02, -1.0041e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6611e-01, -1.7229e+01,  1.8461e+00,  2.7702e-02,  1.8880e-01,
         3.6222e-01,  4.0136e-01,  2.4172e-01, -1.7460e-01,  1.3694e-01,
         2.7695e-01,  2.8130e-01,  1.4554e-02, -4.7307e-01, -4.9502e-01,
         2.4791e-01,  1.6919e-01,  5.2844e-02,  1.5139e-01,  6.7148e-02,
         1.6629e-01,  2.0385e-01, -1.2186e-01,  4.3722e-02,  2.0982e-01,
         3.2354e-01, -6.6180e-02,  1.3225e-01,  1.2294e-01,  9.2036e-02,
         4.7203e-01, -4.2379e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4750e+00, -1.9123e+01, -1.1131e+00, -1.3189e-02, -1.4229e+00,
        -2.0781e-01,  5.5289e-01,  5.7739e-01, -4.0196e-01,  5.5705e-01,
         6.6447e-01,  1.8183e-01,  7.6282e-02,  3.0651e-02,  1.4762e-02,
        -8.1876e-02, -2.8987e-02, -1.0594e-01, -2.2297e-01, -2.3346e-02,
         2.1661e-01, -5.5376e-02,  5.2057e-02, -1.7864e-01,  1.3016e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6506e-01,  8.4389e+00,  2.1502e-01,  5.2170e-03, -1.0430e+00,
        -8.9384e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9704e+00,  2.6355e+01, -3.0379e-01,  4.9093e-01,  3.3679e-02,
        -2.1908e-01,  2.7944e-01, -1.2684e-01,  8.4916e-02,  1.5826e-01,
        -5.2204e-02,  4.2096e-02, -5.6260e-02, -4.3224e-01, -1.6400e-01,
         1.7177e-01,  8.2500e-03, -1.2509e-02, -1.5871e-02,  8.1858e-02,
        -8.8998e-02, -3.8766e-02,  3.6264e-01, -9.3500e-02, -2.0417e-01,
         3.3352e-01,  4.1064e-02,  1.1245e-01, -7.9218e-02, -1.7314e-01,
         2.0279e-01,  2.2452e-01, -7.1363e-02, -7.7582e-02, -8.1005e-02,
         2.1483e-01,  3.6292e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0975e-02, -4.6684e+00, -3.6145e-01, -5.7113e-01, -7.9350e-02,
        -1.0617e-01, -6.6002e-02,  3.5466e-02, -2.5819e-02, -5.3607e-02,
         5.9692e-02, -3.9055e-02, -3.7328e-02,  5.2898e-02, -3.1310e-02,
         1.5739e-01,  1.1836e-01, -2.0835e-01, -2.2774e-02, -1.0966e-02,
         2.4885e-01,  2.5743e-03,  1.0785e-01, -1.4758e-02,  1.8657e-02,
        -1.9598e-02, -2.1961e-02, -6.3695e-02,  4.3730e-02,  5.9447e-02,
         3.4397e-02,  3.3457e-02, -4.2577e-02, -1.1741e-01,  1.2296e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8227e+00, -1.0254e+01,  7.7837e-01,  2.9157e-01, -3.9375e-01,
        -9.2916e-02, -1.5039e-01, -7.7129e-03,  2.0946e-01, -2.8114e-01,
         3.8688e-01, -7.5754e-01,  3.4220e-01, -3.6949e-01,  1.1603e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9148e-01,  2.0311e+01,  1.6651e+00, -9.5156e-01, -2.1075e-01,
         1.9551e-01,  4.1701e-02,  9.7446e-01, -2.2410e-01,  3.3931e-01,
        -5.9696e-02, -1.2164e-01, -1.1492e-01, -3.5019e-02, -1.0613e-01,
         2.0575e-01,  1.5417e-02, -3.6854e-03,  2.0464e-01, -2.8968e-01,
        -2.9337e-01, -1.5553e-01, -3.5775e-02,  8.4324e-02,  2.7487e-01,
        -2.2581e-01, -8.6215e-02, -1.4023e-01, -2.0650e-01, -9.0246e-02,
         5.8618e-02, -2.9566e-01,  3.7073e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6803e-01, -4.1803e+00, -2.7282e-01,  2.9706e-01,  1.1345e-01,
         1.1732e-03, -9.4185e-02,  1.4574e-02,  7.4902e-02,  2.0700e-02,
        -3.9510e-02, -1.4319e-01,  1.8486e-02,  1.3959e-01, -6.1039e-02,
         2.6511e-02, -1.0961e-01,  6.3805e-02,  3.4133e-02, -5.5312e-03,
         3.7087e-02, -3.6935e-02, -7.0745e-02,  2.2883e-02, -1.9590e-02,
        -1.4057e-02,  5.9895e-02, -2.1635e-02,  2.6639e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 1.1144e-01,  1.0453e+01, -1.2391e-01, -2.5839e-01,  2.0612e-01,
        -1.5818e-01,  8.6818e-02,  2.3406e-02, -2.8163e-01, -6.6414e-01,
        -1.0004e-01, -1.5154e-01,  3.7331e-03,  9.0407e-02, -6.5038e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.0500e-01, -1.8564e+01,  1.1756e+00, -4.8781e-02, -4.0294e-01,
        -6.9940e-01, -2.0009e-01, -1.3051e-01, -1.9485e-01, -2.3734e-01,
        -2.0404e-02,  1.3739e-01,  1.5126e-01, -1.7416e-01,  1.2664e-02,
        -3.2575e-02,  6.5889e-02, -1.1383e-01,  2.9059e-02,  2.2388e-02,
        -1.1736e-01,  7.5976e-02, -2.5341e-02,  1.5531e-02, -3.6111e-02,
        -3.6506e-02,  5.5270e-02, -2.2281e-01,  7.4571e-02, -5.8993e-02,
         1.3597e-02,  2.5227e-02, -1.2570e-01,  1.5690e-01,  5.9314e-02,
        -4.2822e-02, -7.9789e-02,  6.3080e-02,  6.9921e-03,  9.4297e-02,
        -1.0863e-01, -4.2143e-02, -1.2325e-01,  1.7867e-01,  6.3393e-03,
         2.3360e-01, -4.8676e-02,  5.0213e-02,  4.9474e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2107e-01,  1.4056e+01, -2.6157e+00, -7.8825e-01, -2.4536e-01,
         5.3833e-01,  1.9948e-01,  1.8542e-01, -2.0240e-01,  4.3086e-02,
        -2.3275e-01, -1.2684e-01, -6.3777e-02,  8.6529e-02, -2.0662e-01,
        -2.8706e-01, -2.4118e-01,  3.7601e-02,  1.0803e-01,  2.0343e-03,
        -2.6538e-02, -1.4281e-02, -2.1603e-01,  1.4148e-02,  1.2392e-01,
        -8.9567e-02, -3.7709e-01, -7.2357e-02,  9.1415e-02, -2.0134e-01,
        -1.2752e-01,  3.0367e-02,  2.4576e-02,  3.4830e-01, -6.1141e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.5152, -14.9864,  -0.4951,  -0.2614,  -0.1447,   0.2854,   0.0863,
          0.0192,  -0.1456,   0.2539,   0.4874,   0.2775,   0.1144,   0.1605,
         -0.1500,  -0.3348,  -0.0841,   1.3540,  -0.2329,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2063e-01, -1.2503e+01,  7.2055e-01,  1.8445e-01, -8.8806e-01,
        -3.1962e-01,  2.9936e-02, -2.7393e-01, -8.8701e-01,  4.1709e-01,
         1.0717e-02, -1.3872e-01, -4.6850e-01,  1.4296e+00,  4.3412e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0115e-01,  8.9027e+00, -4.3617e-01, -7.4239e-02, -5.8718e-03,
        -1.3367e-01, -5.0732e-02,  2.8126e-01,  1.8568e-02, -1.0901e-01,
        -4.7122e-02, -4.0170e-03, -1.1276e-01,  8.6327e-02, -8.7815e-02,
        -8.7572e-02, -1.0608e-01, -3.4870e-02, -8.5382e-02,  2.4788e-01,
        -9.9467e-02, -1.8730e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3682,  9.7460, -0.7842,  0.0434,  0.0756, -0.0892, -0.1786,  0.2452,
        -0.2191,  0.1500, -0.0114,  0.1364, -0.0629, -0.0450, -0.2011, -0.0203,
         0.0211,  0.1618,  0.0126, -0.0823, -0.0314,  0.0490,  0.1486,  0.1965,
         0.1453, -0.0539,  0.0747, -0.1364,  0.2026,  0.5978,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.3798, -13.9524,   0.7442,   0.0990,   0.0605,   1.2228,   0.3798,
         -0.2852,   2.0841,  -0.0733,   0.3642,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6741e-01,  1.4946e+01,  6.9794e-01,  1.4715e-01, -9.6648e-02,
         4.4206e-01,  5.1447e-01,  1.0203e-02, -9.6577e-02,  1.1846e-01,
        -2.5079e-01,  3.2846e-02, -9.0170e-02,  5.1452e-02,  1.3793e-01,
         2.1995e-01, -5.1612e-02, -1.7872e-01, -8.1602e-02, -1.0490e-01,
         2.0761e-01, -1.0520e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0333, 20.4280,  0.2651,  0.2191,  0.2461, -0.1497,  0.4888,  0.2167,
        -0.0462,  0.0601, -0.8434, -0.4054, -0.1433, -0.2501,  0.1812, -0.2503,
         0.1519,  0.0234, -0.1222, -0.1221, -0.1780,  0.2492, -0.3831, -0.2432,
        -0.4157, -0.3173,  0.1755, -0.3097, -0.2427, -0.2706, -0.0853,  0.0736,
         0.8908,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7159e-01, -7.7188e+00, -5.9215e-01,  1.4948e-01,  1.5149e-01,
        -1.8285e-01,  1.1276e-01,  3.1577e-01, -5.1210e-02,  1.1669e-02,
         1.4831e-02, -5.6417e-02,  4.2471e-02,  1.7178e-01,  2.4890e-02,
        -1.0897e+00, -1.9106e-02, -4.3060e-02,  6.8915e-03, -1.3672e-01,
         2.1197e-01,  7.4113e-02, -2.9463e-02, -1.7959e-02, -4.9364e-02,
         5.8765e-02,  9.7855e-02,  3.6352e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.6504e-01, -1.8614e+01, -2.7799e+00,  6.7232e-01, -2.5206e-01,
        -2.7746e-02,  6.5942e-02,  1.3340e+00,  2.4428e-01,  2.1288e-01,
        -5.7950e-02,  9.7783e-02,  2.5052e-02,  5.4475e-02,  4.8233e-03,
         3.6488e-02, -2.0178e-01, -3.7888e-02, -1.5264e-01, -4.1467e-01,
        -9.2469e-03,  3.3172e-02, -2.3197e-01,  1.2206e+00,  1.5234e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-1.0612e+00, -1.7933e+01, -1.1979e-01,  4.2233e-01,  4.5495e-01,
         3.7001e-02,  4.5300e-01,  6.1083e-01, -2.8714e-01, -1.6001e-03,
         3.2859e-01, -1.8881e-02,  1.5164e-01,  3.2501e-01,  1.0035e-01,
         6.2704e-01,  1.3999e-01,  6.3227e-02,  7.1905e-02,  1.9001e-01,
        -1.4828e-02,  2.5872e-01,  2.9520e-01,  1.7462e-01,  9.3790e-02,
         6.6599e-02,  3.1317e-01,  1.4881e-01,  2.6240e-01,  3.2478e-01,
        -8.8076e-02,  7.5855e-02,  1.0584e-02,  6.8467e-03,  9.4698e-03,
         3.3778e-02,  1.1095e-02, -1.0584e-01, -1.2964e-01, -2.2593e-01,
         6.9771e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3020,  8.6354,  0.2853, -0.0982,  0.3407,  0.1081,  0.1265, -0.0737,
        -0.0474,  0.0552, -0.0428,  0.1728,  0.2716,  0.0481,  0.2037,  0.0394,
         0.0245, -0.0470, -0.0217,  0.2092,  0.0743, -0.2215,  0.0830, -0.1573,
        -0.0679,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1790, 13.7506,  0.3936,  0.1095,  0.2875,  0.3929, -0.0534,  0.3086,
         0.0456, -0.2105,  0.0812,  0.0902,  0.1600,  0.1168, -0.0558,  0.0178,
         0.3200, -0.1370, -0.2966, -0.1648, -1.2950,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5153e-01,  7.4194e+00, -1.7231e-01,  1.9219e-01, -2.1177e-03,
         9.7604e-02,  4.5627e-01,  6.0278e-02, -2.7102e-01, -1.6022e-01,
        -1.8469e-01, -9.5047e-02,  2.3396e-02, -4.7060e-02, -1.1446e-01,
         5.5983e-02, -4.3114e-01, -7.0257e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0755e+00,  7.4981e+00,  7.2012e-01, -4.0847e-01,  2.1631e-01,
         5.6344e-02,  8.3108e-02,  4.9727e-02, -7.0380e-02,  5.5167e-02,
         1.9943e-01,  1.4968e-01, -1.0575e-01,  2.2138e-02,  4.0615e-02,
         6.3934e-02, -1.4559e-01,  5.2727e-02,  1.9320e-01,  9.5963e-02,
        -3.8473e-02, -3.8790e-02, -1.4360e-01,  7.4527e-02,  7.2082e-03,
         4.3289e-02,  7.9180e-02,  9.3588e-02, -6.6241e-02,  1.3417e-02,
         6.2972e-02, -1.0783e-02,  8.4297e-02,  1.5024e-01, -8.0939e-02,
         6.4734e-02,  3.4097e-01,  1.9766e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0132,  7.4747, -0.0366,  0.1616,  0.1193,  0.2014,  0.1118, -0.3707,
         0.1325,  0.1627, -0.1545, -0.1579,  0.2062, -0.2306, -0.0995,  0.2212,
        -0.1398, -0.0848, -0.0301, -0.4608,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.6764, -13.0460,   0.1671,  -0.5686,  -0.2760,  -0.4953,  -0.4096,
         -0.8540,  -0.2619,   0.1715,   0.7373,  -0.3114,   0.2247,  -0.0674,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1128e-01,  1.3419e+01,  5.0278e-01,  7.7067e-01,  2.7552e-01,
         2.0026e-01,  6.4577e-01,  1.3696e-01,  1.3404e-01, -9.2179e-02,
         2.9890e-01,  2.2966e-02,  1.4473e-01, -3.2617e-02,  8.6157e-02,
        -1.5051e-01, -9.1456e-03, -7.8030e-02, -1.1676e-01, -8.7743e-02,
         8.7464e-02,  5.1961e-01,  5.6960e-02,  5.1114e-02,  2.0058e-01,
         5.7409e-02,  1.4893e-02,  2.3470e-01,  1.6133e-01,  3.6319e-01,
        -6.1242e-02,  3.6162e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5060e+00, -2.0274e+01, -1.4895e-02, -9.6192e-01,  2.1610e-01,
        -5.3910e-01,  4.1012e-01,  1.3042e-01, -5.7989e-01, -1.7464e-02,
         1.6562e-01,  1.0708e-01, -2.2431e-02,  4.5434e-01,  4.4669e-01,
        -5.6755e-02, -1.4298e-01, -1.0093e-01, -6.0391e-01,  2.2473e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7653e-01,  1.0121e+01,  1.1649e+00, -3.2355e-01, -2.3681e-01,
         4.6449e-01,  4.9896e-02, -1.7544e-01, -1.0055e-02, -1.2662e-01,
        -1.7848e-01,  5.1637e-02,  1.5101e-02,  5.6855e-03,  1.6051e-02,
        -1.4300e-01, -4.1378e-02,  1.3803e-02, -1.0163e-01,  1.5447e-02,
         5.9296e-02,  6.5610e-02,  2.1417e-01, -7.5443e-03, -3.0297e-01,
        -5.2377e-02, -2.5779e-01, -1.8270e-01, -7.5202e-02, -6.6146e-02,
         3.7530e-02, -1.0666e-01, -2.9487e-01, -8.2269e-02,  9.5575e-02,
        -2.9076e-02,  7.7141e-02, -9.1992e-02,  4.8168e-03,  2.8916e-03,
         2.4852e-02,  4.8320e-02,  1.0184e-01,  2.4157e-02,  1.0428e-01,
        -5.5203e-01,  5.1977e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9767e+00,  2.0205e+01,  1.5821e-01, -1.2253e+00,  1.2096e+00,
        -4.2148e-01,  2.1512e-01, -1.2619e-01, -1.7421e+00,  3.9500e-01,
        -2.3153e-01,  1.9198e-02,  6.7233e-02,  2.7130e-02, -8.3220e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9217e+00,  2.6967e+01,  3.3838e-01, -1.2210e-01,  4.3044e-02,
        -2.8860e-01, -5.6428e-02,  7.5270e-01,  3.2134e-01, -1.5464e-01,
         3.2547e-01, -2.6401e-01, -1.0488e-01,  5.7164e-01,  2.1946e-01,
         1.1452e-01,  9.5859e-02,  8.1802e-02,  6.8336e-02, -1.6491e-02,
         3.8443e-02,  2.2292e-01,  3.2851e-01,  2.8093e-01, -1.1978e-01,
        -3.5837e-01,  6.0419e-01,  2.9257e-01, -1.5037e-01, -1.2096e-01,
         1.6224e-01,  3.9534e-02,  3.0128e-01,  6.3966e-02,  1.5802e-01,
         6.2992e-01,  1.2338e-01,  3.4985e-02, -3.0307e-01,  1.9327e-01,
         2.2508e-01,  1.1932e-01, -4.2606e-01, -1.9402e-01, -2.8143e-01,
         1.7505e-01,  3.3546e-01,  1.2377e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 1.2542e-01, -9.8658e+00,  7.7433e-02, -3.9297e-01,  3.1776e-01,
         8.3120e-02,  9.4064e-03, -2.3660e-01,  1.6368e-01, -1.2098e-01,
        -4.4420e-02,  2.5582e-01,  1.2432e-01,  1.9330e-03,  5.7808e-02,
        -3.0109e-02,  7.4226e-02,  1.0192e-01,  5.8229e-02, -3.5917e-02,
        -1.7918e-02,  1.7220e-01,  1.1843e-01, -2.5432e-01, -1.1705e-01,
         6.9664e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3891e-02, -9.7871e+00, -3.3327e-01, -1.0030e-01,  4.6346e-02,
         1.2210e-01,  7.6352e-02,  1.1734e-01,  1.7015e-01,  3.6258e-02,
        -1.6539e-01,  2.2938e-01,  3.4931e-02, -5.1078e-03,  4.2270e-02,
        -1.3266e-01,  7.0048e-02, -1.0706e-01,  3.3919e-01, -7.9219e-02,
         5.6128e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9953e-01,  8.6084e+00, -4.9631e-01,  1.1488e+00,  4.7252e-01,
        -3.4828e-01, -1.6124e-02, -8.5023e-02,  8.7619e-02, -8.1590e-02,
        -2.2993e-02,  5.7315e-02, -1.8419e-01,  2.5447e-02, -1.2484e-01,
        -1.2135e-01,  2.8772e-01, -2.1515e-01,  6.3235e-02, -1.3018e-01,
         5.7341e-02, -1.7398e-02, -2.3090e-01,  9.4987e-02, -1.4894e-02,
        -1.2075e-02, -6.1356e-03,  6.6348e-02, -3.9866e-02, -3.5716e-02,
        -2.1265e-01,  6.9815e-02, -4.1854e-03, -1.2418e-02, -2.5207e-01,
        -4.2336e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4525e-01,  1.3272e+01,  1.5426e-01,  6.0256e-03, -3.4692e-01,
         5.6452e-01,  1.3891e-01, -4.3443e-01,  3.6355e-01, -4.3063e-02,
        -1.1963e+00, -2.3098e-01, -2.0877e-01,  1.6346e-01, -6.2273e-03,
        -1.7950e-01, -1.1070e-01, -2.3046e-01,  2.0413e-01, -1.3423e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8034, 12.3327,  1.2263,  0.4159,  0.2696,  0.0701, -0.9538, -0.0891,
        -0.0669,  0.5743,  0.6436,  0.2087, -0.3194, -0.0507,  0.1562,  0.1477,
         0.2020, -0.1448, -0.3874,  0.3038, -0.7030,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6865e+00,  1.2676e+01,  1.3743e+00,  1.9521e-01, -1.3597e-01,
         7.8364e-02, -1.6765e-02, -5.2407e-01,  6.2094e-02, -8.2650e-02,
         3.0509e-01, -3.6527e-03,  6.7353e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.6997e-01,  1.4973e+01,  6.5579e-01,  7.4897e-02, -4.6172e-01,
        -2.6311e-01, -1.6563e-01, -2.8224e-02,  2.2247e-01,  5.9484e-02,
         6.5417e-01, -2.5628e-01,  4.0868e-02, -2.4633e-01,  5.6906e-02,
         9.7789e-02, -3.7216e-01, -1.0421e-01, -1.9435e-01,  1.0029e-02,
        -1.9422e-01, -2.3979e-02, -2.0717e-01, -1.2684e-01, -1.1484e-01,
         1.6299e-01, -1.3748e-01,  1.0720e-01,  1.0026e-01,  1.0396e-01,
         7.5813e-02,  3.5707e-01,  9.4786e-02,  5.5791e-02,  2.7878e-02,
         6.9431e-01,  5.1353e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8755e-01,  9.8746e+00,  8.1348e-01,  5.6308e-01,  1.4169e-02,
         1.2195e-01, -8.7450e-02,  1.8346e-02,  2.7471e-01, -1.5119e-01,
         2.1103e-02,  2.0764e-02,  4.8305e-01, -1.0651e-01,  7.3746e-02,
         1.2453e-01,  2.1747e-01,  1.7166e-01, -3.6410e-02,  3.5035e-03,
        -2.3484e-01, -9.4223e-02, -1.7987e-01,  3.0224e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1977e+00,  1.1650e+01,  1.9931e+00,  8.6714e-01, -1.2566e-01,
         2.5495e-01, -9.7188e-02,  1.5420e-02, -1.2432e-01, -8.7757e-03,
         2.5871e-02,  2.4165e-01,  4.3574e-01,  3.2803e-01, -2.3711e-01,
        -3.5543e-01,  2.0296e-01, -1.2502e-02, -1.5355e-01,  7.8225e-02,
         5.8743e-02,  4.6408e-02, -5.4325e-02, -7.4372e-02, -1.0233e-01,
        -1.2743e-01, -6.4067e-03, -9.4939e-02,  4.6410e-02,  3.9950e-02,
         5.9031e-01,  3.0211e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5788, 20.9475,  0.1117, -0.1595, -0.6578,  0.0500, -0.0960, -1.1071,
         0.1593,  0.4422, -0.1015, -0.2881, -0.1783, -0.2218, -0.0690,  0.0739,
        -0.5402, -0.3301, -0.1653,  0.1681,  0.0990, -0.4812, -0.0658,  0.2200,
        -0.1242, -0.1644, -0.4877, -0.1037, -0.2316,  0.2973, -0.6193,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7249e+00, -1.8579e+01, -1.9264e-01,  4.3204e-01,  5.3000e-01,
         1.1798e+00,  7.3107e-02,  2.2692e-01,  2.9065e-01,  1.9458e-01,
        -7.9436e-01,  1.0559e-02,  1.6978e-01,  9.8144e-01,  7.6388e-02,
         1.0247e-02, -7.6196e-02,  1.0649e-01,  1.0696e-01, -3.2936e-01,
         5.6492e-01, -6.2996e-01, -2.0308e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5299, 10.1719, -0.7355,  0.0579, -0.0869,  0.0965, -0.2357,  0.2274,
        -0.0620, -0.0192,  0.0842, -0.0144, -0.2223, -0.3201, -0.0229, -0.3203,
        -0.2356, -0.0470, -0.0363,  0.0318, -0.0269, -0.1550, -0.0574,  0.1212,
         0.0659, -0.0837, -0.5109,  0.2447,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-5.4261e-01,  7.8551e+00, -5.0476e-01,  7.7330e-03,  1.2004e-01,
         1.1951e-01,  6.5492e-02,  6.2352e-02, -1.0717e-01, -1.1035e-01,
         1.9435e-01, -2.8610e-02,  1.6164e-02,  1.2944e-02, -2.8921e-03,
        -5.5235e-02, -1.3770e-02,  1.6935e-01, -8.9151e-02, -1.9597e-01,
         2.5629e-01,  5.0531e-02,  2.8565e-03,  4.5935e-02, -5.4641e-02,
         1.5390e-01,  2.0640e-01, -5.6941e-02, -5.5373e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4959, 14.6415, -0.2486,  0.4531, -0.5858,  0.0160,  0.3236, -0.3296,
        -0.4539, -0.0255, -0.3521,  0.2268, -0.0665, -0.3682, -0.1490,  0.0302,
        -0.1750, -0.1137,  0.2284, -0.1352, -0.2269, -0.4672, -1.4494,  0.5104,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4833, 11.9938,  0.7377, -0.1887,  0.3358, -0.3634,  0.4453,  0.1113,
         0.2457, -0.3286, -0.2504,  0.3838, -0.1367, -0.2371,  0.1153, -0.1213,
        -0.2458,  0.0430, -0.1515, -0.1785,  0.1799, -0.0526, -0.1173,  0.2374,
        -0.1144, -0.0276, -0.2176,  0.0642, -0.1897,  0.0614, -0.2893, -0.0522,
         0.1199, -0.3092], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4485e-02, -8.6754e+00, -7.9206e-01,  2.8602e-01, -1.1948e-01,
        -2.2835e-01, -4.7886e-01, -2.4632e-01, -1.9123e-01, -7.9539e-02,
        -1.0560e-01, -6.4302e-02,  7.0019e-03,  5.6662e-02, -5.7431e-02,
        -8.9926e-02, -1.4544e-01, -1.0156e-01, -4.5873e-02, -5.0976e-01,
         1.4634e-01,  3.8200e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0121, -3.9481, -0.3001, -0.0580,  0.0920, -0.0309, -0.0066, -0.0477,
        -0.0828,  0.0604,  0.0525, -0.0312,  0.0231, -0.0547, -0.0801, -0.1268,
        -0.0144, -0.1371,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3289e-01,  9.3166e+00,  5.4397e-01, -2.9694e-01, -1.9696e-01,
         4.5823e-01,  5.8018e-02,  4.0253e-01,  2.1796e-01,  1.6151e-02,
         3.6498e-01,  1.4065e-01, -7.7953e-02, -1.3444e-01, -6.4294e-02,
        -2.8790e-01,  1.4221e-02, -8.0764e-02,  3.2397e-02,  5.7324e-02,
        -3.7905e-03, -8.7914e-02,  5.4306e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6490e+00,  1.3473e+01,  6.0978e-01,  6.5245e-02,  4.8191e-01,
        -1.2897e-01, -5.1668e-01, -9.7047e-02, -4.4224e-01, -1.7758e-02,
         4.1502e-01,  3.7680e-01, -1.2836e-02, -1.4025e-01, -8.5551e-01,
        -3.4157e-02, -3.0902e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.2746, -25.6107,   1.1542,  -0.1659,  -0.9375,  -0.2127,   0.1647,
          0.0612,  -0.1054,   0.2773,  -0.3476,   0.2141,  -0.3203,  -0.3217,
         -0.5601,  -0.5536,   0.8602,  -0.6181,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5478e-01, -1.0557e+01,  1.8857e-01, -1.4178e-01, -2.7911e-02,
        -1.2540e-02, -8.5145e-03,  1.4909e-02,  1.0956e-01, -5.4618e-01,
        -1.8364e-02,  5.4756e-02,  1.1305e-01, -2.0735e-01,  1.4686e-02,
         4.4692e-02,  1.1539e-01,  2.1115e-01,  4.1892e-02,  1.9091e-01,
         1.1955e-01, -6.3310e-02,  1.4251e-01,  1.7377e-01,  1.7275e-01,
         3.4754e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5060e-01, -7.8687e+00, -1.1801e+00,  4.4740e-01, -5.6186e-02,
         2.2312e-02,  5.8516e-02,  1.3780e-01,  2.4412e-01,  1.1861e-01,
         1.6718e-01,  1.1587e-01,  5.5713e-02,  4.8633e-02,  9.8833e-02,
         5.6367e-03, -6.2748e-02,  1.3375e-02, -1.3735e-02,  1.0822e-01,
         1.7536e-01,  5.1009e-02,  5.4548e-02,  2.8838e-02, -4.7568e-03,
         9.6065e-02,  1.4988e-01, -4.8382e-02, -3.3574e-02, -3.0461e-01,
         3.4886e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4456e+00,  2.5053e+01, -1.4942e-02, -6.2512e-02, -6.3241e-02,
         8.6449e-01,  4.2664e-01,  2.7796e-01, -1.4466e-01,  1.5534e-01,
        -2.1590e-01, -5.5960e-01, -1.0170e+00, -1.5792e-01, -9.2376e-02,
        -5.0124e-01, -9.0986e-01, -2.0101e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1620, 13.3620, -0.6802, -0.8086,  0.8142,  0.5936, -0.0278, -0.4147,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.4937e+00,  1.7461e+01,  5.9838e-01, -3.5447e-01, -4.4040e-01,
         1.9814e-01, -2.4196e-01, -7.6438e-02, -1.7854e-01, -3.5800e-02,
        -1.5938e-01, -4.2227e-02, -1.5714e-01, -6.1516e-03, -1.4551e-01,
         1.9621e-01,  5.7685e-02, -1.8633e-01, -5.6183e-02,  2.2195e-01,
         3.3978e-02, -1.8760e-01, -6.9959e-01, -2.3912e-01, -2.8911e-01,
         3.6835e-02, -1.4654e-01,  1.7008e-01, -2.5461e-01, -6.7494e-02,
         5.5465e-01,  1.6779e-01,  1.3554e-01,  1.9270e-01, -1.0974e-01,
         1.0165e-01, -7.9897e-02,  3.5604e-02, -7.8934e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9680e-01,  5.9845e+00, -1.0075e-01, -2.7562e-01,  1.4015e-01,
         2.1322e-01, -2.0913e-02,  3.8947e-02,  3.8419e-01,  1.7358e-01,
         1.7311e-01,  3.7785e-01,  1.0415e-01,  2.3931e-01,  3.4289e-01,
         4.8208e-02,  8.1302e-02, -5.0046e-02,  2.7066e-02,  9.1200e-02,
         2.9873e-01, -2.6073e-01, -2.6327e-02,  7.9241e-03, -1.2886e-02,
        -3.6191e-03,  3.2277e-01, -2.1512e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3242, -5.9724,  0.1575, -0.4505, -0.0294,  0.1020, -0.0244,  0.0275,
        -0.0459, -0.1504,  0.0193, -0.1293,  0.6132,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3014e-01,  1.0174e+01, -5.8702e-01,  4.0973e-02,  6.1788e-02,
         9.8445e-02,  4.0592e-02,  3.2961e-01, -5.3020e-02,  1.3032e-01,
        -1.8817e-01, -2.9724e-01,  1.7103e-02,  1.8647e-01,  6.5895e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.8102e-02,  6.4748e+00,  2.4525e-01, -4.3578e-01, -4.3982e-03,
         1.0883e-01,  8.2152e-02, -8.5747e-02,  1.8629e-02, -6.2708e-02,
         5.2681e-02,  1.1372e-02,  5.6239e-02,  2.0536e-01, -1.5442e-02,
        -2.0938e-03,  8.6738e-03,  1.8860e-02, -1.4728e-02, -1.8460e-01,
        -1.0446e-01, -3.9141e-02,  5.4018e-02, -3.5470e-03, -3.3937e-02,
         5.1437e-02,  1.5487e-02,  6.7155e-03,  4.2061e-03, -3.4822e-02,
        -6.8963e-02, -3.2730e-02, -8.1807e-02,  1.3906e-01,  1.0686e-01,
         6.4707e-02, -2.1588e-02, -2.8127e-02,  1.4791e-02, -2.5256e-02,
         4.3606e-02, -8.3799e-03,  3.6198e-03, -2.2212e-02, -8.3860e-02,
        -1.8864e-01, -2.5705e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9841e-01, -1.7772e+01,  1.9174e-01,  3.6396e-02,  5.9937e-02,
         5.3523e-01,  3.9070e-01, -1.6400e-01,  1.0549e-01,  6.6111e-01,
         2.6717e-01,  3.2406e-01, -1.0181e-01,  7.1048e-02, -5.4435e-02,
        -2.4899e-02, -7.7565e-02, -9.4322e-02, -2.0507e-01, -5.6684e-02,
        -7.8098e-03,  3.4694e-02, -1.3257e-01, -6.1981e-02, -5.7980e-02,
        -6.9707e-02, -3.8795e-01, -6.4184e-02, -4.3818e-02, -4.0164e-02,
        -1.6308e-01,  1.0880e-01,  9.6644e-02, -1.0359e-01, -1.2123e-01,
        -1.0778e-01, -5.2533e-03,  8.5622e-03,  3.9087e-02,  1.0775e-01,
         4.7727e-02,  6.7162e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1167e+00,  1.6867e+01,  7.6042e-01, -8.8840e-01,  4.9879e-02,
         4.7574e-02,  6.2755e-01,  1.3840e-01,  1.6899e-01, -2.5732e-02,
        -1.1596e-01, -3.1980e-01,  7.4219e-02,  3.3552e-03,  1.9022e-01,
        -2.8250e-01,  9.5833e-03,  8.4042e-02,  1.3593e-01,  6.3343e-02,
        -3.6117e-01,  1.2845e-02, -8.6254e-02,  2.6713e-01, -1.1821e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0354e+00,  1.5166e+01,  3.6010e-01, -2.4344e-02,  8.7920e-02,
         4.9266e-02, -1.1982e-01, -8.6245e-03,  3.7754e-01,  7.7957e-02,
         2.1518e-01, -6.7721e-02,  2.3491e-01, -3.5533e-01, -7.0579e-02,
        -2.3858e-01, -2.1002e-01,  2.7096e-01, -2.0634e-01, -2.5912e+00,
         6.5520e-01,  6.4192e-01, -1.1141e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1063, 16.9908,  0.8009, -0.2751,  0.6440, -0.1938, -0.4195, -0.2720,
        -0.2051,  0.6546,  0.7771,  0.0467, -0.2011,  0.0599, -0.4845, -0.0354,
        -0.1576,  0.7130, -0.1005, -0.3016,  0.1419, -0.2697, -0.1289,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  3.0613, -14.2961,   0.4154,   0.1074,   0.0480,  -0.2944,  -0.2842,
         -0.0461,  -0.3764,   0.3853,   0.7925,   0.3706,  -1.4213,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5126, 15.6010,  0.4404, -0.4414, -0.2341, -0.4015, -0.0289, -0.7816,
         0.1374,  0.1815,  0.1052, -0.3302, -1.0744,  0.7274,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4058,  8.0922,  0.0758, -0.0364,  0.2567, -0.1190, -0.0293, -0.0460,
        -0.0271, -0.1622, -0.2243, -0.0702,  0.0629, -0.0661,  0.1446, -0.0703,
        -0.2995, -0.0249, -0.2308,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.8239, 27.6939, -2.5159, -0.6300, -0.1495, -0.3185,  0.0540, -0.1034,
        -0.0733,  0.2171,  0.0928, -0.0465, -0.1499, -0.2297, -0.0947, -0.4246,
        -0.1258, -0.1777, -0.1638,  0.8662, -0.1249,  0.0787, -0.1430, -0.1426,
         0.1513, -2.3331,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2767, -6.3279, -0.4536,  0.1903,  0.1039,  0.3176, -0.0272, -0.0829,
        -0.0688,  0.4210, -0.0074,  0.0877, -0.2628, -0.0748,  0.2306, -0.1579,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2817e+00,  2.1028e+01,  6.3423e-01,  3.9983e-01,  2.6585e-01,
         7.1646e-01,  5.8333e-01,  3.7901e-01,  6.5237e-01,  5.1230e-01,
        -4.4827e-01, -7.8021e-01, -5.4022e-03, -6.4003e-02,  1.0537e+00,
         4.0812e-01,  6.0696e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0199, 17.1915,  2.0053, -1.5159, -0.2095, -0.4508,  0.5174, -0.2765,
         0.4701,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3492e+00, -1.5109e+01, -8.3678e-01, -5.5712e-01,  1.3980e-01,
        -1.0953e-02,  5.7934e-03, -1.8684e-01, -2.8514e-01, -1.1183e-01,
        -2.9260e-01,  1.7836e-01,  5.1252e-02,  2.5150e-01, -2.2091e-01,
        -4.2879e-01,  2.8242e-01, -8.7059e-02,  5.0159e-03, -8.0040e-02,
        -2.2209e-02, -1.2182e-01, -2.1981e-01,  7.9535e-02, -4.1410e-03,
         2.3688e-01, -1.2522e-01,  6.6526e-02, -3.9386e-02,  9.5271e-02,
         1.0039e-01, -1.4391e-02,  1.5607e-02,  6.4720e-02, -1.0057e-01,
         3.2747e-01,  1.4620e-01, -3.3276e-02,  1.8276e-01,  1.1964e-01,
         2.5615e-01,  1.5097e-01,  1.3645e-01,  1.4227e-01, -1.6009e-02,
        -6.3886e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3961e+00,  1.6737e+01, -1.5224e+00,  8.5058e-02,  2.0543e-01,
        -2.8031e-01, -9.7558e-01, -1.1210e-01,  5.5377e-02,  1.0558e-01,
        -1.3723e-01, -1.2115e-01, -9.9042e-02,  4.6121e-02,  2.3139e-01,
         3.0068e-02,  8.5890e-02,  9.5953e-03, -1.1784e-02,  2.5619e-01,
        -2.9314e-01, -7.4093e-02, -5.8614e-01, -3.7530e-01,  4.7582e-02,
        -1.2365e-01,  1.7328e-01,  5.1121e-01, -9.1464e-02, -3.6441e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.0688, -12.2762,  -0.3272,  -0.4059,  -0.4581,  -0.2386,   0.6535,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3288e-01,  1.6429e+01, -8.0143e-01,  1.4883e-01, -3.9056e-03,
         2.5084e-01, -3.1152e-02,  7.9774e-01, -8.6005e-02,  2.9157e-02,
        -1.1096e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.6322, -13.2587,  -0.3305,  -0.3538,  -0.5333,   0.0153,  -0.8435,
          0.3468,  -0.2435,  -0.2168,  -0.0385,   0.3231,   0.4946,   0.8731,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.6552e-01, -1.1571e+01, -6.8514e-01, -2.6542e-01, -3.1493e-01,
         3.7567e-02, -7.7704e-02, -5.2621e-02,  2.0968e-01,  2.2896e-01,
        -9.2549e-02, -1.1549e-01, -4.3884e-02,  1.7159e-01, -2.7661e-02,
         2.0618e-01, -2.9160e-02, -1.1598e-01,  3.0339e-02, -2.4310e-01,
         1.0178e-01, -1.6392e-01,  8.0244e-02, -2.2429e-02,  1.6326e-01,
         5.0549e-02,  4.3619e-03, -1.1159e-01, -1.4175e-03, -1.9693e-01,
         3.8503e-01,  1.1596e-01,  1.6015e-01, -6.9904e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.1359, -10.0209,   0.4745,   0.3187,  -0.1314,  -0.2328,  -0.2628,
         -0.1508,  -0.0967,  -0.0914,   0.0329,   0.3072,   0.1774,  -0.0484,
         -0.0676,   0.1113,   0.0378,   0.5278,   0.0735,  -0.0878,   0.4406,
          0.1480,   0.4811,  -0.0892,  -0.2250,  -0.1556,   0.4587,   0.6840,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.7543, -32.5798,  -1.1280,   1.5189,   0.3520,   0.2668,   0.3377,
          0.4704,   0.4301,  -0.0810,   0.9468,   1.0756,   0.9931,   0.7607,
         -0.0421,   0.3711,  -0.2732,  -0.2685,  -0.4498,   0.1827,   0.1968,
          0.3114,  -0.5341,  -0.6502,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([-2.8298e-01, -1.8363e+01, -4.9449e-01, -2.3244e-01, -2.0739e-01,
         3.5510e-01,  1.4572e-01, -2.7698e-03, -5.8112e-02, -2.2281e-01,
         7.9838e-01,  2.0215e-01, -4.7704e-01, -1.2684e-02, -2.1736e-01,
         1.2711e-01,  9.1258e-02,  2.9126e-01,  3.5416e-02,  1.8285e-01,
         3.6351e-04, -4.0053e-02,  2.8291e-01,  9.8066e-02, -3.4073e-02,
         4.7252e-01,  1.1990e-01, -5.6334e-02,  6.3470e-02,  6.1168e-01,
         7.5997e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0302e+00,  1.3855e+01,  1.4173e-01, -2.3997e-01,  4.4721e-01,
         6.1620e-03,  1.0466e-01, -2.5590e-01,  8.7418e-01, -1.2392e-01,
         2.8747e-01,  5.5744e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.5946, -13.5187,  -0.2268,   0.6789,   0.2038,  -0.4584,  -0.3647,
         -0.0633,  -0.3482,  -0.3380,   0.8931,  -0.3725,  -1.0114,  -0.4011,
         -0.2969,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4577e-02, -1.4295e+01, -1.5457e+00, -3.7437e-01,  4.7258e-01,
        -2.8921e-01, -2.1557e-02, -8.7008e-02, -2.9754e-01,  2.6612e-01,
        -7.9770e-02,  2.5110e-01,  7.9428e-02,  1.6772e-01,  1.1744e-01,
        -7.2306e-02,  3.1269e-01,  3.4127e-01,  9.5963e-02,  4.2703e-01,
         8.2131e-02,  5.3823e-02,  5.1449e-02,  2.4808e-02, -1.2995e-01,
         5.5629e-02,  7.2706e-03, -2.5076e-02,  4.5561e-02, -1.8680e-01,
         1.8214e-01,  1.6735e-01, -6.0798e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2878, 19.1528,  1.5965, -1.4672, -0.2490, -0.0239, -0.6368, -0.7390,
         0.1614, -0.4767, -0.4632,  0.1188, -0.5088, -0.9147,  1.1838, -0.3101,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3242, -9.7264,  1.0583,  1.1026,  0.2145, -0.3645, -0.2468,  0.0461,
        -0.1167,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7382,  8.9389, -1.2186,  0.7181,  0.8872,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5919e-03, -2.9231e+01,  2.1331e-01, -1.9089e-01,  1.5521e-01,
        -1.3804e-01,  1.3730e-01, -3.8495e-01, -3.4941e-01,  8.4547e-01,
         2.1287e-01, -5.1287e-01,  1.1828e+00, -5.8563e-01,  6.5523e-01,
        -5.3285e-02,  6.9189e-02,  1.5356e-01, -2.8726e-01,  5.5021e-02,
        -2.6736e-01, -5.4610e-02, -1.2802e-01,  2.0971e-01,  3.2451e-03,
        -1.1710e+00, -4.8684e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0081e+00,  2.8806e+01,  7.8029e-01, -6.3022e-01, -6.7235e-01,
        -2.0532e+00,  4.7537e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.0949, -10.8870,  -0.2783,  -0.3441,  -0.1590,  -0.1825,  -0.3488,
         -0.1349,   0.2749,   0.8094,   0.1227,  -0.0830,  -0.0649,   0.2118,
          0.4171,   0.2571,  -0.0901,   0.0973,   0.4427,   0.0403,   0.2199,
          0.3353,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9475, -8.4167, -0.5380,  0.1357,  0.7171,  0.0572,  0.1032,  0.0235,
         0.0601, -0.0357, -0.0700,  0.1244,  0.0352, -0.1947,  0.0654, -0.0992,
         0.1408,  0.0248,  0.2418,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0939e-02, -2.0665e+01,  2.9026e-01, -9.1064e-02,  9.0228e-02,
        -8.0366e-01,  1.3991e-02, -3.1108e-01,  1.0493e-01, -1.7275e-01,
         2.0005e-01,  2.5677e-02, -4.1183e-02,  8.6277e-02, -6.9042e-02,
        -7.6703e-02,  5.2877e-01,  1.4333e-02,  1.4399e-01, -1.7844e-01,
        -2.7278e-01, -3.4941e-01,  8.4421e-02, -9.1022e-02, -7.2033e-02,
        -1.8715e-01, -3.0529e-02, -8.7086e-02, -4.7281e-01, -2.1844e-01,
         8.5201e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.8176, -3.8491, -1.0498,  0.1642,  0.0601,  0.0650, -0.0922,  0.0220,
        -0.0179, -0.0285,  0.0361,  0.0517, -0.1577,  0.1274, -0.0783,  0.0179,
         0.1153, -0.1020,  0.1570, -0.0175, -0.0449, -0.0901,  0.0611, -0.0337,
         0.1357,  0.0314, -0.0321,  0.3382, -0.0098, -0.0865,  0.0739,  0.3065,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3693,  9.4832,  0.4304, -0.1370,  0.0992,  0.0801,  0.5927,  0.4990,
         0.7982,  0.1096,  0.0872,  0.1688, -0.2961, -0.5041, -0.0467, -0.0786,
         0.0213,  0.1435, -0.3097,  0.0647,  0.0418, -0.4109,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9642e-01,  9.3315e+00,  2.0217e-01, -2.2108e-01, -6.5095e-01,
         3.3549e-02, -7.3358e-01,  1.1115e-01, -5.4520e-02,  3.5864e-01,
         5.3315e-02, -9.0648e-02,  2.0115e-01, -6.2356e-02,  3.3824e-01,
         5.9178e-02, -1.9180e-02,  1.0428e-01,  6.6551e-02, -1.1368e-01,
         4.9092e-02,  3.4455e-03,  5.7659e-02,  7.5261e-02, -1.8187e-01,
         1.4048e-01, -5.1493e-02,  4.2675e-01,  4.4408e-03, -6.0204e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3653e+00,  1.4416e+01,  5.0280e-01, -5.4715e-01,  8.1523e-02,
        -1.4052e-01,  3.7117e-01,  7.7599e-02, -9.6808e-02, -3.2729e-01,
        -3.1395e-01, -1.6715e-01, -1.4144e-01,  1.0295e-01,  4.3603e-02,
         2.7900e-01,  1.6902e-03,  5.4019e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3718e-01, -9.6454e+00, -1.1877e-01, -5.2800e-02,  5.6232e-01,
         1.1741e-01, -1.1524e-01, -1.0367e-02,  1.8078e-01,  1.1642e-01,
         3.7223e-02,  4.2155e-02,  4.8646e-02,  1.5955e-01, -4.2418e-02,
         1.7138e-01,  4.3156e-02,  1.6386e-02,  8.3564e-02,  1.0593e-01,
         7.3892e-02,  2.8162e-02, -5.1392e-02, -2.9342e-02,  1.9256e-02,
         6.2215e-03,  2.4009e-01,  1.1877e-02,  2.3973e-02,  7.4440e-03,
         3.5062e-02, -4.6719e-02,  2.2863e-03,  1.5273e-02,  2.0247e-03,
         1.1465e-01,  8.8792e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8356e-01, -9.9538e+00,  9.5697e-01,  8.1688e-01, -8.9797e-02,
         8.8778e-02,  9.2197e-02,  4.5594e-03,  1.6846e-01, -1.2322e-01,
        -1.8913e-01,  9.8153e-01,  2.4148e-01, -4.5811e-01,  8.2418e-02,
         1.5053e-01,  4.2891e-02, -1.3563e-01,  5.9083e-02,  8.2663e-02,
         8.2859e-02, -1.2876e-01, -4.4424e-01, -4.6355e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9442e-01,  1.7378e+01, -3.4183e-01, -3.4582e-01,  3.3608e-01,
        -8.6532e-02, -2.9368e-01,  5.1421e-01,  3.8684e-01,  3.8028e-02,
        -6.3696e-02,  2.5936e-02, -1.7246e-01,  2.5386e-01, -2.1892e-01,
         2.9172e-01, -1.1893e-02,  3.0239e-02,  2.7597e-02, -7.5910e-03,
         1.7694e-02, -8.5652e-02, -1.8471e-02, -1.2132e-01,  5.3539e-01,
         8.0866e-01,  7.9261e-02,  2.2564e-01, -3.5618e-02,  2.0575e-01,
         5.5870e-01,  4.8488e-01,  1.5140e-01,  2.6264e-01,  2.2570e-01,
        -2.0380e-01, -7.5517e-02,  1.1253e-01, -3.4618e-02,  8.3571e-02,
         1.4328e-02,  1.3106e-01,  4.3589e-02,  4.0885e-01,  1.0396e-01,
         2.9564e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.7009e-02,  7.4586e+00,  2.2198e-01,  9.1291e-03, -1.5700e-01,
        -3.2168e-02,  1.4591e-01, -7.7883e-03, -1.8112e-02, -1.1148e-01,
         4.5320e-02, -1.1692e-01, -3.8691e-04, -4.1705e-02,  4.3165e-02,
         6.2338e-03, -4.6180e-03, -2.9658e-01, -1.4532e-01,  1.1295e-01,
         2.3143e-01,  5.3403e-01,  1.3317e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8703e-01, -1.2735e+01,  2.8788e-01, -1.4826e-01, -2.0395e-01,
         1.0874e-02, -3.6549e-02,  9.8277e-01,  1.7674e-01,  1.5826e-01,
         4.7833e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.1570, -12.3773,  -0.4385,   0.3518,  -0.0167,  -0.4199,  -0.2465,
          0.3392,   0.0675,  -0.1967,  -0.1422,   0.0807,  -0.8041,   0.3367,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.1956, -13.6287,  -0.5229,   0.4736,   0.5833,   0.0816,   0.2280,
          0.5243,  -0.0547,   0.3902,  -0.4866,   0.0969,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.4348e-02,  1.3584e+01,  1.1090e+00, -3.6895e-01, -5.1269e-02,
        -5.6405e-02, -3.4605e-01,  7.9504e-02, -6.8859e-03,  7.3118e-03,
        -1.6016e-02, -1.5932e-01,  9.3297e-03, -1.8070e-01, -1.0631e-01,
        -1.8950e-02,  1.2246e-02, -9.5438e-02, -6.2298e-02, -3.2621e-01,
         5.7784e-02, -5.4101e-02, -9.4981e-02,  2.5657e-02,  1.9114e-01,
         4.3262e-01,  5.0028e-01, -1.8579e-02, -7.3052e-03, -6.0394e-02,
         9.8262e-02, -3.8683e-02,  1.6347e-01,  1.6751e-01,  1.5076e-01,
        -1.9651e-01, -1.7409e-02,  7.2682e-03,  1.1687e-01,  2.3109e-02,
        -3.5864e-02,  3.1074e-02,  9.0686e-02,  6.2830e-02, -6.6549e-02,
         7.8198e-02, -1.4487e-03,  4.7496e-03,  4.8523e-02, -9.0451e-02,
        -5.7468e-03, -1.8665e-02, -5.5058e-02,  1.7467e-01,  2.7658e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-1.2324e+00, -1.1652e+01, -3.9307e-01, -4.0446e-02,  2.8725e-01,
        -4.8054e-02, -7.6083e-02,  1.2809e-01, -3.4453e-02, -2.7731e-04,
         1.6942e-01, -8.0095e-02, -1.1314e-01, -2.6997e-01, -1.0480e-01,
        -3.8060e-02,  3.2680e-01, -3.4484e-03,  1.7664e-02, -9.1732e-02,
         3.6858e-02, -2.4306e-01,  5.7762e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0558, 16.5402,  1.8338,  0.2405,  0.4768,  0.0337, -0.3508,  0.7095,
        -0.9264,  0.5438, -0.5463, -0.3323, -0.3147, -0.4730, -0.0311, -0.3049,
        -0.9630,  0.7922,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2705,  3.8283, -0.0857, -0.0624,  0.0487,  0.2869,  0.3090, -0.0063,
         0.0846,  0.0401,  0.1783, -0.0119,  0.0828, -0.0651,  0.0655,  0.1596,
         0.0650,  0.2390,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2915, 16.1977,  1.3805, -0.5523,  0.2764, -0.4813, -0.2247, -0.2388,
        -0.2530, -0.1543, -0.0296, -0.5004,  0.5894, -0.4007, -1.8821,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  3.2768, -17.2985,  -0.3269,   0.8372,   0.8228,  -1.0175,  -0.6008,
          0.2852,   0.6095,  -0.2995,  -0.3249,  -0.3034,  -0.3309,  -0.0436,
         -0.5306,   0.1450,   0.3140,   0.0286,   0.1046,   0.7663,   0.1277,
          0.6839,   0.2201,   0.3241,   0.1000,   0.2356,   0.2985,  -0.1061,
          0.9409,  -1.1394,  -1.4834,  -0.9733,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.2374e-01, -5.1038e+00,  6.9433e-01,  4.1719e-01, -1.2549e-01,
         5.3297e-02, -1.3847e-01, -7.7693e-02, -1.9192e-01,  2.2403e-03,
         4.0100e-02, -1.5457e-01,  6.3195e-02, -5.2320e-02, -2.2145e-02,
        -1.2775e-02,  2.2850e-02,  1.4152e-01, -1.8635e-01, -3.3428e-02,
        -6.9388e-02,  1.1934e-01, -7.8285e-02,  1.6234e-01, -4.2502e-03,
        -1.5718e-01,  6.1092e-02, -8.4786e-02, -6.9636e-02,  7.2594e-02,
        -2.0738e-01, -3.6818e-01, -4.8408e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.4263e-02, -9.4371e+00, -8.0454e-01, -5.6795e-01, -3.3556e-01,
        -1.7139e-01, -7.5948e-02,  7.2770e-02,  9.7180e-02,  5.4954e-02,
         4.2205e-02,  1.5697e-01,  1.6462e-01, -1.0323e-02,  2.7451e-01,
        -6.7708e-03, -1.3824e-01,  2.4082e-02,  4.4750e-02, -1.5681e-02,
         7.7389e-02,  1.6808e-01, -6.2459e-03,  1.8526e-01, -2.3943e-02,
        -1.7794e-02, -1.0796e-01, -2.4083e-02, -2.2026e-02,  8.4362e-02,
        -6.3209e-03,  3.6260e-02, -4.6144e-02, -1.7359e-02, -3.1966e-02,
         7.3834e-02, -9.2377e-03, -1.4125e-02, -1.1934e-02,  4.1129e-02,
        -3.0723e-02,  2.2615e-02,  4.5438e-02, -8.3110e-02,  1.7265e-02,
        -6.2413e-02,  6.9121e-02, -3.4620e-02, -3.0124e-02, -8.0650e-02,
         1.0073e-01, -4.0558e-02,  1.3854e-02, -2.0776e-02, -7.6718e-02,
         5.5121e-02, -9.6051e-02, -1.1155e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.9532, -18.0520,  -1.3535,  -0.6340,   0.0665,   0.3647,  -0.0486,
         -0.0491,   0.1476,  -0.0960,  -0.2145,   0.0722,   0.6703,   0.4683,
          0.0345,   0.2023,   0.0366,   0.1793,   0.1743,   0.1350,   0.0327,
          0.2548,   0.1287,   0.1069,   0.0935,   0.4929,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1386e+00, -1.7579e+01,  8.7377e-01, -1.9399e-01,  3.5069e-01,
         8.3816e-02,  1.5582e+00,  3.1184e-01,  1.2824e+00, -1.7396e-02,
         2.1409e-01, -9.9711e-02,  1.9309e-01,  5.3355e-02,  2.0478e-01,
         5.6393e-02, -7.7439e-02,  3.2401e-01, -2.0034e-01, -8.9847e-02,
         2.1103e-01,  1.5801e-02,  3.1564e-01, -9.0584e-01,  8.2695e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3278e-01,  1.6250e+01,  1.5688e-01, -3.4234e-01, -3.1337e-02,
        -4.8813e-01, -2.4665e-01, -3.4435e-01, -3.7979e-01, -1.2647e-01,
        -3.8798e-01, -6.4635e-01,  4.8539e-01,  2.5204e-01, -7.5888e-03,
        -1.3215e-01, -6.4561e-01,  3.1319e-02,  6.4373e-03, -2.7237e-01,
        -9.8455e-02,  2.2441e-01, -1.8728e-01, -4.4072e-01,  2.7271e-01,
        -5.9131e-02, -1.7142e-01, -4.7453e-02, -1.7741e-01, -8.4387e-02,
         7.8831e-02,  1.2189e-02,  3.2358e-02, -7.1070e-02,  6.6770e-03,
        -4.5964e-02, -2.2944e-01,  7.8678e-02,  7.7769e-03,  6.9155e-02,
        -1.7074e-02, -1.5537e-02, -1.1217e-01, -2.3255e-01, -3.9112e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7929,  6.3380,  0.5190,  0.0785, -0.0473,  0.2479, -0.0295,  0.1587,
         0.0441,  0.0881,  0.3093,  0.1081, -0.0238, -0.0573,  0.1045, -0.1384,
        -0.1208,  0.0481, -0.0249, -0.0409, -0.0841,  0.1143,  0.0394,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2695e+00,  6.6893e+00, -4.0716e-01, -2.3629e-01, -1.0033e-01,
         3.1855e-01,  5.2292e-01, -5.7245e-03, -2.4862e-01,  1.0232e-02,
         2.1529e-02, -5.2990e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 2.1040e-01, -1.7310e+01, -1.3034e+00,  4.5016e-01,  3.1725e-01,
         1.8058e-01, -1.1390e-01, -2.7851e-01, -3.4563e-01,  1.8458e-01,
        -5.0503e-01, -4.0595e-01, -8.5486e-02, -2.9810e-01,  1.4314e-01,
         9.8736e-03,  1.9764e-01, -9.7461e-02,  1.1569e-01,  3.3592e-02,
         2.8750e-01,  1.9587e-01,  5.5309e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2896e-01, -5.9684e+00,  9.4870e-02,  1.2041e-01, -1.4638e-01,
         8.0062e-02, -8.0046e-02,  8.0966e-02, -3.9137e-02, -7.7815e-02,
        -5.4425e-02, -1.4947e-02,  6.8520e-02, -8.4992e-02, -2.1278e-01,
         1.2714e-01,  5.3901e-02,  1.5251e-01, -8.3468e-02, -3.6027e-02,
        -3.5273e-02,  1.2366e-02,  2.7183e-01, -5.6814e-02,  1.8622e-02,
         8.0227e-02, -2.8638e-02,  2.1486e-02,  1.6938e-02,  2.9104e-03,
         4.0204e-02, -3.4610e-01,  5.6207e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.5174e-01, -1.4214e+01,  1.2179e+00,  7.4090e-01,  6.9004e-01,
        -9.2672e-02,  5.2015e-01,  1.9009e-01,  1.7241e-01,  1.8597e-03,
         2.0560e-02, -1.9372e-01,  2.2802e-01, -1.7229e-02, -1.6957e-01,
         6.9202e-02,  3.4720e-02, -3.2759e-01,  1.4748e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0849, -9.2508,  1.0759, -0.2982, -0.2406, -0.4979, -0.0727, -0.0292,
        -0.1121, -0.1321, -0.0139, -0.3934, -0.2363, -0.1326,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4276e-01, -1.8118e+01,  8.4044e-01,  1.5685e-02, -8.5012e-01,
        -4.4176e-01, -1.1842e-01,  4.2640e-02, -5.2713e-01, -1.1993e-01,
        -1.9352e-01, -9.7672e-02, -6.0448e-02,  1.2697e+00,  1.8310e-01,
        -5.3763e-02,  5.6552e-01, -2.7331e-02,  2.7954e-01,  4.1915e-01,
        -2.2081e-01,  5.1872e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3623e-01,  1.8262e+01, -4.8749e-01,  3.8557e-01,  2.0342e-01,
         1.5177e-01,  9.3225e-03, -7.5886e-01,  1.1302e-03, -2.4945e-01,
        -4.4434e-01,  2.4295e-01,  3.2487e-02,  1.5593e-02,  1.4768e-02,
         9.8472e-02, -3.0579e-01,  4.1105e-01,  1.5998e-01,  3.0083e-01,
         4.0166e-02,  2.7506e-01, -5.4412e-02,  1.4248e-01, -2.1772e-01,
        -1.5242e-01,  3.1525e-02,  9.8773e-02,  4.5817e-01,  2.5829e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.7444e-01,  1.0253e+01,  1.5487e-01, -6.7774e-01, -4.0948e-01,
         9.2451e-01,  8.7457e-01,  1.9298e-01,  1.2633e-02, -1.8251e-01,
        -4.8557e-01, -3.9671e-01, -3.4200e-01, -1.7501e-01, -8.5035e-03,
        -4.6427e-01, -1.5443e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.2877, -14.0016,  -0.7983,   1.2965,   0.4466,  -0.0655,   0.0230,
         -0.5376,   0.9412,  -0.2603,   0.5400,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9386e+00,  2.2550e+01,  3.0550e+00,  7.0914e-01,  1.3692e+00,
        -1.3305e+00, -6.8960e-01, -4.7219e-01, -5.6985e-01, -1.1712e-03,
         1.9424e-01,  5.6228e-01, -1.0912e+00, -1.4517e-01, -1.2611e+00,
        -2.1830e-01,  9.0096e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4533e-03,  1.1937e+01, -4.6073e-01,  1.4132e-01,  4.1794e-02,
        -2.7675e-01, -2.7537e-01,  1.5039e-01, -2.1018e-01, -1.6871e-01,
        -1.4148e-01,  2.3207e-01, -4.7842e-02,  3.8628e-02, -2.9772e-01,
         1.9996e-01, -3.5588e-02, -4.8169e-02, -1.2669e-01, -3.5667e-02,
        -7.4319e-02, -1.9882e-01,  2.3287e-01, -5.2902e-02,  3.0531e-03,
        -1.1483e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1522e+00,  1.1899e+01,  1.3417e+00, -3.0646e-01,  1.6198e-01,
         5.5912e-01, -4.1488e-01,  6.2347e-01,  4.6459e-01, -3.2596e-02,
        -2.0832e-01, -3.1604e-01, -4.1026e-02,  1.2651e-01,  6.7868e-03,
        -1.7921e-01, -1.9811e-02,  7.0839e-02, -2.2780e-02, -1.5349e-01,
        -1.7641e-01, -1.4148e-01, -8.1132e-02,  2.0405e-02,  1.0052e-01,
         1.2667e-02, -6.4806e-02,  1.6142e-01, -3.5307e-01,  2.1588e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7455, 18.2859,  0.3191, -0.8302, -0.1304, -0.4298,  0.9908, -0.1815,
         0.3327, -0.4909,  0.1961,  0.4092,  0.3366,  0.5269,  0.0947, -0.1341,
        -0.4963, -0.4004, -0.2101,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([ -0.2812, -18.0212,  -0.6604,   0.6262,   0.8723,   0.2401,   0.0303,
          0.5045,  -0.2876,   0.0992,   0.1019,   0.1620,  -0.0692,  -0.0656,
          0.2516,   0.0588,  -0.3443,   0.8092,  -0.1949,  -0.1721,  -0.0333,
          0.3315,  -0.0614,   0.3410,   0.2539,   1.6196,  -0.4927,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5018, -8.5558, -0.0208,  0.4257,  0.3209, -0.2921, -0.2035, -0.0104,
         0.3356,  0.0597, -0.3279, -0.4070, -0.4552, -0.5116,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9046e+00,  1.3029e+01, -2.5380e-01,  8.4072e-03, -1.7818e-01,
        -3.2287e-01,  2.2856e-01,  7.9266e-02, -1.9234e-01, -3.9844e-02,
        -2.3686e-01, -3.3277e-01, -3.2283e-01,  7.4563e-02, -1.9030e-02,
         5.8806e-02,  7.2862e-02, -3.1277e-04,  3.0362e-01, -1.2725e-02,
        -2.7394e-01, -1.8731e-01, -8.9625e-02,  6.5784e-02,  3.1289e-01,
        -8.6710e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0547e-01, -8.8707e+00, -1.6644e-03, -1.5148e-01,  3.1777e-02,
         2.9952e-01, -6.9085e-02, -1.8552e-02,  2.5188e-01,  3.9416e-01,
         4.3906e-01,  4.2638e-01, -4.1377e-01, -3.7319e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2368e+00,  1.1923e+01, -1.8695e-02, -1.3077e-01,  3.2216e-01,
        -1.4729e-01,  7.9692e-02,  1.2415e-01,  2.8252e-01,  2.6172e-01,
         8.6387e-03,  2.9736e-01, -1.3713e-01,  3.0035e-01,  1.2353e-01,
         2.6227e-02,  1.2773e-01,  2.3223e-01, -5.4748e-02, -1.5048e-01,
         7.4884e-02,  1.9313e-02, -7.6866e-02,  8.3835e-02,  3.1758e-02,
        -2.8654e-02, -1.9912e-01,  4.0935e-02,  2.6520e-01,  9.9976e-02,
         5.3744e-01,  6.7944e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8337e-01, -1.0112e+01,  4.5601e-02,  8.3511e-01,  6.8999e-02,
         1.7339e-01,  8.5335e-02,  2.2162e-01, -2.3373e-01,  2.1888e-01,
        -8.3358e-03,  7.3428e-03,  3.2289e-03, -1.4915e-01, -6.3443e-02,
         6.5903e-02, -2.3370e-01, -8.7194e-02, -1.6818e-01, -5.6178e-02,
         2.2910e-02,  2.1697e-02,  1.2661e-01, -9.4899e-02,  2.8436e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.5868, -13.1121,  -0.6237,   1.3653,   0.8165,  -1.0869,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4985e-01,  1.4358e+01,  5.0363e-01,  4.9486e-02,  3.6812e-01,
         4.6903e-01,  1.8079e-01,  7.9633e-02,  3.3860e-02,  1.0220e-01,
        -3.5249e-02,  2.8087e-01,  6.9328e-02, -2.6096e-02,  1.1941e-01,
        -6.2098e-02, -1.7876e-01,  2.1094e-01, -1.2636e-01,  3.1362e-03,
        -1.6701e-01, -2.3922e-02,  2.9130e-01, -1.1979e-01,  2.9602e-02,
        -7.3073e-02,  4.8929e-02,  9.1022e-02, -4.3156e-02, -3.0908e-02,
         9.0285e-02, -1.8872e-02, -2.5228e-02,  3.4793e-02,  8.1001e-02,
        -7.3297e-01,  3.0955e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2312e+00, -1.3603e+01, -5.0829e-01, -1.1409e+00,  6.5278e-01,
        -8.2221e-02, -4.3836e-02,  2.5231e-01, -2.5807e-02,  4.1690e-02,
         1.1616e-01, -1.2541e-01, -9.2505e-02,  1.1723e-01, -4.6234e-02,
        -1.5931e-01,  3.7615e-01,  5.8918e-02, -1.1552e-02, -1.3316e-01,
         5.9780e-01,  4.9039e-01,  1.1009e+00,  6.4175e-02,  2.8701e-01,
        -1.5058e-01, -1.5041e-01, -2.5347e-01,  2.0861e-01,  9.8593e-02,
        -2.2316e-02,  1.1675e-02, -2.9240e-01, -9.6659e-02,  1.8096e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2124, 20.7744, -2.8542,  1.7272, -0.1376,  0.3540, -0.3186,  0.1123,
        -0.2296, -0.7919, -1.1301, -0.0914,  0.7768,  2.2862,  1.6886,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8656e-01, -1.5846e+01, -1.1915e+00,  1.2842e+00,  8.3914e-02,
        -4.6434e-01,  3.4216e-01, -5.5840e-01, -7.5522e-02, -1.3317e-01,
        -6.3186e-02,  2.9557e-01,  2.7322e-01,  4.3450e-02, -2.4533e-02,
         8.9920e-02,  9.7334e-02,  5.8171e-02,  2.1075e-01,  2.4671e-01,
        -6.6401e-03,  2.4973e-01,  8.7973e-02,  1.9344e-02, -9.5989e-02,
         2.2416e-01, -7.0433e-02,  2.3090e-02,  9.0657e-02,  1.2389e-01,
         2.4083e-02, -6.7875e-02, -1.9351e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1359e-02, -1.3317e+01,  2.2053e-01, -5.1617e-02, -1.8072e-01,
         3.9536e-02,  1.7727e-01,  7.0924e-02,  1.7419e-01,  1.0545e-01,
         8.9388e-02, -1.0736e-02,  1.6178e-01,  1.8138e-01,  2.3180e-01,
         2.2413e-02,  8.1813e-02,  1.2529e-02, -3.0776e-01, -4.1257e-02,
         9.1702e-02, -3.4155e-02, -2.8591e-02,  1.8300e-03, -3.2350e-02,
         1.1147e-01,  2.5812e-01, -3.3113e-01,  3.5550e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.9363,  7.4685, -0.4714, -0.0790, -0.1909,  0.0698,  0.5467, -0.0481,
        -0.1305, -0.1816, -0.0509,  0.0558,  0.0847,  0.0827, -0.3946,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4283e-01,  1.0676e+01,  1.3396e-01, -3.3392e-01, -1.9953e-01,
        -5.6150e-02,  2.0653e-01, -3.4920e-02, -1.2127e-01, -7.1946e-02,
        -1.9550e-01, -1.4653e-01,  4.7948e-02, -1.4851e-01, -7.6558e-02,
        -6.8492e-02,  7.0594e-03, -8.3042e-03, -1.4872e-03, -4.6221e-02,
         2.7736e-02, -2.5330e-02, -7.8292e-02, -5.8679e-02, -8.8901e-02,
        -1.4857e-02, -1.7831e-01,  6.0883e-03, -1.7829e-02, -4.6680e-02,
         2.2166e-02, -1.4238e-02,  2.4747e-02, -5.6608e-03, -6.0549e-02,
         5.0393e-02,  1.4877e-02,  4.1159e-02, -6.5230e-02, -4.7250e-02,
         2.7977e-02,  1.5296e-02,  1.7003e-01, -5.7831e-02,  5.3999e-02,
         3.3025e-03,  7.0569e-02,  3.4946e-02, -2.1750e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2965e-02, -1.2125e+01,  2.2025e+00,  9.7249e-01,  1.0136e-01,
        -5.3362e-01, -1.5587e-01, -4.2357e-01,  8.4934e-02, -2.0630e-01,
         9.3571e-03, -1.7722e-02, -1.0261e-01,  4.2902e-02,  5.7921e-02,
         1.4190e-01,  6.8739e-03, -9.7214e-02, -1.9996e-01,  5.7947e-02,
         1.3296e-02, -2.1826e-01,  1.8642e-01,  6.3694e-02,  6.8476e-02,
         3.3694e-01, -1.2906e-02,  8.7583e-02, -6.6939e-03,  1.9518e-01,
         7.9838e-02,  2.8909e-01,  1.8973e-01, -1.5101e+00, -1.9088e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.0599, -12.3496,  -1.0213,   0.0239,  -0.3578,   0.1727,  -0.0930,
         -0.1237,  -0.0433,  -0.2547,  -0.1981,  -0.0217,  -0.0597,  -0.1354,
         -0.3030,   0.2301,  -0.0731,   0.1060,   0.9065,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2210, 12.3294,  0.8695, -0.1820,  0.4371,  0.3533,  0.3477, -0.0156,
         0.0780, -0.1710,  0.3976, -0.1013,  1.0623, -0.2894, -1.1345,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5141e-01, -8.8338e+00,  4.9664e-02,  3.6990e-01,  2.4449e-01,
        -6.6312e-02,  1.2166e-01,  1.0499e-01,  1.3942e-01,  4.7787e-02,
         3.2549e-02, -2.3206e-02,  2.9398e-01,  1.2363e-01, -1.2934e-01,
        -4.1425e-02,  8.5216e-02, -8.1464e-02, -2.8910e-03, -1.0031e-01,
         4.9116e-01, -2.5042e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.1090, -13.4838,   0.3455,  -0.3543,  -0.1235,  -0.1275,   0.8149,
          0.4367,   0.7422,   0.0365,   0.3245,  -0.2266,   0.0208,   0.0966,
          0.1795,   0.1443,   0.2287,  -0.2124,   0.0276,   0.1426,   0.1435,
          0.1729,   0.3029,  -0.1628,   0.0431,   0.1291,  -0.0508,  -0.0328,
         -0.0172,  -0.2122,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4635, 20.9399, -2.4178, -0.1538,  0.7600, -0.2924,  0.2178, -0.4465,
         0.2749,  2.2402,  0.9617,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4160e-01,  1.9740e+01, -1.9990e+00, -2.0217e-01,  3.9767e-01,
        -6.6722e-01,  1.8303e-01, -1.7236e-01, -2.6498e-01,  4.1650e-02,
        -8.9743e-01,  2.9643e-01, -1.7983e-01, -1.2876e-01, -4.5272e-02,
        -1.3349e-01, -1.0428e-01, -4.5611e-01, -6.0884e-02, -2.5032e-02,
        -4.2838e-01, -1.5620e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6187e+00, -2.4916e+01, -2.1660e+00,  4.6212e-01, -4.4928e-02,
        -5.5950e-01, -4.8498e-01, -2.6198e-01,  1.6395e-01, -1.5420e-02,
         1.6732e+00,  2.6063e-01,  1.9317e-01, -1.2315e-01,  2.9939e-01,
         6.0255e-01,  1.3176e-01, -4.6644e-01,  2.5070e-03,  9.0572e-02,
         1.5101e-01,  4.9160e-02,  7.9711e-01,  1.7672e-01, -5.9771e-03,
         1.1866e-01, -1.3784e-03,  7.8184e-02,  1.0433e+00,  4.9734e-02,
         1.0452e-01, -8.0012e-01, -8.0859e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7843e-01, -1.0327e+01, -1.2218e+00,  1.9146e-01,  2.3777e-01,
        -1.1363e-02, -1.6693e-01,  2.2381e-01,  1.8883e-02, -3.1090e-02,
         6.4877e-02, -9.9396e-02,  6.8032e-02,  2.4814e-01, -2.4586e-03,
        -5.6479e-01,  7.3767e-02, -4.2151e-02,  1.0271e-01, -6.0865e-02,
        -1.1900e-01,  1.2073e-01, -1.0112e-01,  2.7849e-03, -1.7457e-01,
         9.8361e-02,  1.5934e-01,  3.0430e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6687e-01, -9.2854e+00, -4.3706e-01, -2.4010e-01, -2.6720e-03,
        -8.6067e-02,  1.8508e-02,  6.4371e-02,  1.2755e-01, -1.7325e-01,
         2.4761e-01,  4.6470e-02,  8.2683e-02,  5.3711e-02, -2.0276e-02,
        -2.8019e-02,  1.7452e-01,  8.2765e-02,  1.4857e-02,  1.6152e-01,
        -5.0769e-02, -3.6034e-02,  1.2379e-01, -4.0703e-01,  2.7218e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-1.3553e+00,  2.1314e+01,  1.1518e-01, -9.8032e-02, -1.4157e-01,
         1.2845e-01, -3.5860e-02,  5.4764e-02, -2.7090e-01, -2.6996e-01,
        -1.9375e-01, -8.0573e-01, -1.1275e-01, -4.3197e-01, -3.0473e-01,
        -5.7772e-01, -1.9832e-01,  1.8157e-01,  1.7538e-01,  1.9205e-01,
        -2.8323e-01, -3.6013e-01,  2.0392e-02, -1.6540e-01, -5.6005e-01,
         1.9569e-03, -3.3178e-01, -1.5816e-01, -3.3311e-01, -6.4487e-01,
         2.1590e-01, -2.2410e-01, -1.0845e-02,  9.1626e-03,  7.9174e-02,
        -1.6588e-01, -2.7892e-01, -1.5469e-01, -3.6193e-01, -2.9846e-01,
         8.7965e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4826e+00, -2.1768e+01,  1.9843e+00,  3.2490e-01, -1.4319e-01,
         1.2264e+00, -1.2397e-01, -6.0608e-01,  1.2373e-01, -3.0763e-04,
         2.0478e-01, -4.6561e-01,  4.7153e-01,  5.5037e-01, -2.5993e-01,
        -5.8044e-02,  4.2352e-01, -9.7043e-02,  1.2260e-02,  9.2193e-02,
         1.6145e-01,  9.6799e-01,  6.4418e-02,  7.5735e-01,  3.6204e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.9139, -12.9453,  -1.0879,   0.2247,   0.2820,   0.0508,   0.0736,
         -0.4022,   0.2247,   0.0907,  -0.2504,   0.0341,   0.0591,   0.0494,
          0.1323,   0.0133,   0.1114,  -0.1900,   0.3365,   0.4262,   0.1429,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1739e-01,  9.2069e+00, -3.1620e-01, -1.2892e-01, -2.1425e-01,
         9.0510e-02,  1.6962e-01, -9.4793e-02,  4.2604e-03,  1.1404e-01,
         2.1775e-02, -2.2620e-01,  1.8180e-01, -1.3891e-01, -1.5252e-01,
        -5.7532e-01, -3.5324e-01, -7.1694e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9885e-01, -1.1044e+01,  1.1698e-01,  1.8422e-01, -4.3194e-01,
         4.9188e-02,  8.3828e-02, -2.7834e-01,  9.4485e-02,  3.3617e-02,
        -2.3325e-02,  8.1825e-02, -4.4629e-03,  7.3121e-02, -3.5265e-02,
         3.2290e-01,  3.8593e-01, -4.4382e-04, -3.6069e-01, -4.2505e-01,
         4.8387e-02, -2.5240e-02,  2.8533e-01, -1.7874e-02, -5.8853e-02,
         1.8599e-02,  6.0628e-02,  9.0941e-02,  1.6604e-02,  1.3050e-01,
         1.7098e-02, -1.9833e-02,  1.7662e-01,  5.7850e-02,  2.9143e-01,
        -7.4154e-02,  1.5587e-01,  1.2550e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5036e+00,  1.9910e+01, -1.5003e+00, -2.1375e-01,  5.4847e-01,
         1.5613e-01,  7.3835e-02,  1.3031e-01,  1.9384e-01, -2.1716e-01,
        -1.3133e-01, -1.5509e-01,  2.7989e-01,  3.9744e-01,  5.2594e-02,
         1.6728e-01, -3.7819e-04, -5.0002e-01, -1.5385e-01, -9.6931e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2677e+00, -1.5245e+01,  1.4831e+00, -1.0776e-02, -2.8833e-01,
        -2.1641e-01, -4.9737e-02, -2.9967e-01, -4.7594e-01,  2.5156e-01,
         2.1713e-01, -4.7759e-02,  6.3495e-01, -2.8842e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0943e-01, -2.0784e+01, -4.0446e-01, -1.4964e-01, -8.1947e-01,
        -6.0200e-01, -2.8955e-01, -1.7124e-01, -7.4651e-02,  4.4845e-02,
         3.8112e-01, -6.2080e-02, -1.0075e-01,  3.4556e-01, -7.1835e-03,
        -3.6576e-01,  3.9535e-01, -7.8611e-01, -2.6741e-01, -2.2262e-01,
        -9.1183e-02, -1.2555e+00,  2.0097e-01, -7.9478e-01,  2.8318e-01,
        -4.6970e-02, -2.0313e-01, -2.2529e-01, -5.7535e-02, -1.0142e-03,
        -2.3077e-01, -1.2814e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.8004, -10.3123,  -0.7306,  -0.6386,  -0.4485,  -0.1400,  -0.1410,
          0.0798,  -0.0509,   0.2928,   0.0574,   0.1981,   0.0404,   0.1548,
          0.0886,  -0.0180,   0.0245,   0.0187,   0.3817,   1.0305,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7338e+00, -1.3490e+01, -9.5508e-02, -1.7413e-01, -2.6999e-02,
         4.3063e-01, -8.8065e-02, -1.3509e-01,  1.4579e-01, -2.0069e-01,
         4.7758e-02,  1.1419e-01,  1.3975e-02, -1.8795e-01, -1.8207e-02,
        -2.6769e-01,  1.2811e-01, -7.3968e-03, -8.6771e-02, -2.5245e-02,
        -1.0377e-01, -4.3572e-01,  1.4596e-02,  3.7418e-01, -2.9106e-02,
         2.4734e-01,  4.3043e-01,  5.7621e-01,  4.4457e-01,  2.1394e-02,
        -1.6064e-01,  7.8255e-02, -2.0678e-01, -7.7220e-03,  5.4613e-02,
         8.8806e-03,  2.0332e-02, -8.5053e-02,  1.4911e-02, -1.1718e-01,
        -2.8845e-01, -3.6282e-02, -8.3596e-02, -7.5288e-02, -1.2665e-01,
        -6.5055e-01,  5.9767e-03,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4702, 21.3851, -0.0718, -0.6513, -0.3525,  0.2244,  0.4192, -0.0634,
         0.7940,  1.0801, -0.2167,  0.7377, -0.2780,  0.0231,  0.4482,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3690e-01, -1.6295e+01, -1.2266e+00,  7.5166e-01, -2.3385e-01,
         4.7002e-02,  1.6615e-01, -4.8114e-01,  2.8926e-01, -3.0180e-01,
        -6.3463e-02, -2.1901e-01, -1.4532e-01,  5.1272e-01,  5.3552e-01,
         1.8021e-01, -5.7050e-02,  2.2920e-01,  1.4332e-01, -6.7614e-02,
        -6.7422e-02,  1.6162e-01, -8.4699e-02, -5.4607e-02,  2.5735e-01,
         1.5867e-01, -2.5549e-01,  1.2062e-01, -4.4269e-02, -6.2699e-02,
         1.2864e-01,  6.2030e-02, -3.6545e-02,  9.9264e-03, -5.2465e-02,
        -2.6157e-01,  3.2562e-02,  2.4926e-02,  1.8121e-01, -7.0217e-02,
         5.1246e-02,  4.9076e-02, -5.1389e-02, -6.6346e-02, -1.0416e-01,
         9.2696e-02,  2.3999e-01, -1.0520e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-6.6397e-01, -1.4484e+01, -9.2551e-01, -1.2895e+00,  9.5940e-02,
         4.0311e-01,  1.2716e-01,  7.5806e-01,  8.7842e-01,  1.4906e-02,
        -1.3582e-01, -5.4751e-02,  5.8863e-02, -2.6516e-03,  1.2682e-01,
         6.4989e-02, -8.1337e-02,  2.6586e-01,  3.9320e-02, -1.3992e-01,
         7.2505e-04,  3.5228e-01,  3.1930e-01,  5.7095e-02,  3.5779e-01,
         2.9865e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4106e-01, -9.5010e+00, -5.1535e-01,  2.4754e-01, -2.8078e-02,
         1.3171e-01, -5.9270e-02,  1.2904e-02,  8.0430e-01, -1.3227e-02,
        -1.0978e-01,  5.3888e-03, -1.1391e-02, -9.5187e-02,  1.2078e-01,
        -3.9459e-02,  1.1777e-01,  1.1396e-01,  3.6876e-01,  1.4398e-01,
         2.1444e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.9742e-01,  1.0982e+01,  1.8055e+00,  6.5416e-01, -5.0703e-01,
        -1.3468e-01,  6.0517e-02,  6.7514e-02,  2.4568e-02,  5.6169e-03,
        -8.1767e-02,  2.7226e-03, -1.5973e-02,  3.4866e-02, -3.1583e-02,
         1.3105e-01,  2.3766e-01, -2.5452e-02, -2.5536e-01,  4.8810e-02,
         1.3070e-02, -3.0715e-02, -8.1479e-02,  1.0078e-01,  3.3967e-01,
        -2.1453e-01, -2.2719e-02,  1.0060e-02,  3.1221e-02,  1.3512e-01,
        -1.3506e-01, -1.0829e-01,  2.8765e-01, -3.9345e-02,  1.8342e-01,
        -3.2978e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6013, 12.9157, -0.5788, -0.0151,  0.1725,  0.3290,  1.2222, -0.6034,
         0.0290, -0.0823,  0.3722, -0.0287, -0.1783,  0.1074, -0.0461,  0.0641,
        -0.0396, -0.2588,  0.1736, -1.4419,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2103e-01, -1.1542e+01, -1.2841e+00, -1.9854e-01, -4.4355e-01,
         8.3542e-03,  2.4128e-01,  1.7128e-01, -1.2982e-01, -5.0452e-01,
        -2.0369e-01, -1.8158e-01,  9.6313e-02, -3.1571e-02, -4.1890e-01,
         2.6467e-01,  7.7003e-02,  8.6475e-02,  3.7561e-01,  1.5982e-02,
         8.9651e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7117, 14.0386,  1.4443, -1.8681, -0.1163,  0.1685, -0.0527, -0.9866,
         0.3108, -0.0744, -0.0959,  0.2325, -0.6904,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.6500e-01,  2.2767e+01,  2.0730e-02, -4.1859e-01, -3.4990e-01,
        -4.2898e-01, -1.4617e+00, -1.7251e-01,  2.1008e-01,  1.0425e-01,
         1.2884e-01, -6.4576e-02, -1.7134e-01, -3.3101e-01,  3.8569e-01,
         2.1619e-01, -8.4759e-01,  2.9684e-01, -1.5157e-02, -6.4804e-02,
         4.0435e-01,  1.1865e-01, -1.8264e-01,  4.4687e-02,  1.1484e-01,
        -1.6853e-01,  8.8450e-02,  2.9167e-01, -1.0113e-01,  1.7832e-01,
         1.6716e-01, -1.4550e-01,  2.2645e-01, -2.6882e-02,  4.6584e-02,
        -4.8948e-01,  1.9848e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3481e-01,  1.3628e+01, -3.3097e-01,  8.2937e-02, -6.8952e-01,
        -4.1204e-01, -1.2281e-01, -4.4601e-01,  1.5228e-01, -2.5584e-01,
        -2.5962e-01, -1.3462e-03,  3.6445e-01,  4.0654e-01,  1.2205e-01,
        -1.8582e-01,  1.3000e-01,  1.1788e-01,  2.4023e-02, -9.0558e-02,
        -2.0733e-01,  2.0657e-01, -3.3438e-01,  2.3330e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6448, 19.7136, -0.4800,  0.6671, -0.1523,  0.5583, -0.5809,  1.0424,
        -0.3154, -1.1763, -0.0579, -0.7193,  0.9146, -0.2351, -0.1794, -0.5727,
        -0.0824, -0.4816, -0.2133,  0.1698, -0.0925, -1.1678, -0.0948, -0.1046,
        -0.5788, -0.3473, -0.2641, -0.3270,  0.0841,  0.1963,  0.0744, -0.4117,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2478e-01, -2.4138e+01, -2.5140e+00, -1.8869e-02,  3.0221e-01,
         9.3784e-01, -1.1501e-01,  9.1455e-02, -2.8450e-01,  2.9443e-01,
         2.4570e-01,  4.2361e-02,  1.7546e-01,  3.9741e-02,  1.6365e-01,
         1.9638e-01, -1.0013e-01, -1.4143e-01,  8.9328e-02,  2.5985e-01,
         7.2265e-01,  3.5901e-01,  1.9737e-01, -1.9349e-01, -7.8029e-02,
        -5.3837e-01,  9.3304e-01,  3.0715e-01,  2.2212e-01,  1.0929e-03,
         1.9247e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8131e-01,  1.4312e+01,  2.1823e+00,  1.3659e-01, -1.5866e-01,
         1.5403e-01, -2.2341e-01, -2.6723e-01, -2.7659e-01,  1.6468e-01,
         6.3564e-01,  6.4548e-02, -2.8688e-01, -4.9751e-01,  2.2077e-01,
         5.0146e-03, -1.9049e-02, -6.0229e-02,  1.3718e-02,  1.2760e-01,
        -1.4390e-03,  2.0482e-02, -1.5242e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1290e-01,  1.0133e+01, -3.5085e-01, -4.5665e-01,  7.3668e-02,
         1.2536e-02,  8.0263e-03,  5.9502e-02,  1.1010e-01, -1.2951e-01,
        -5.5875e-02, -3.0821e-01,  2.1219e-02, -2.6862e-01, -5.4683e-02,
        -1.0610e-02, -3.7852e-02, -2.4244e-02, -6.9735e-02, -9.5080e-02,
        -2.8673e-01, -9.0819e-02,  7.6740e-02, -1.1560e-01, -1.5138e-01,
         5.7421e-02,  1.4465e-01, -1.3812e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-2.6415e-01,  8.5489e+00, -6.6715e-02,  2.5817e-01,  9.0794e-02,
         3.8239e-02,  1.6917e-01,  2.1527e-01, -4.1896e-04, -1.6653e-01,
         2.2325e-02,  1.1979e-02, -2.4502e-02, -1.0750e-01,  1.7665e-02,
        -6.2024e-03, -6.2453e-02,  7.5126e-02, -3.3701e-02, -4.4694e-02,
         1.5584e-01, -4.1729e-02, -1.9621e-01, -7.4245e-03, -9.7633e-02,
         3.1746e-02,  1.4365e-01, -2.1002e-01, -2.6062e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8552e+00,  1.9720e+01, -4.1309e-01,  2.4107e-02,  4.0764e-02,
        -2.3498e-01,  1.2607e-01, -1.6483e-01,  2.7074e-01, -5.3783e-01,
        -5.3933e-01, -1.1699e-03, -4.8364e-01, -2.1592e-01, -2.2283e-01,
         2.3876e-01, -4.3979e-02, -1.3851e-01,  7.9217e-03, -1.1794e-01,
        -3.4202e-01, -2.3959e-01, -1.1062e+00, -1.5974e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.6507, -11.2435,   0.1506,  -0.2766,  -0.2520,  -0.3931,   0.0966,
         -0.1724,   0.1291,   0.1102,  -0.3835,  -0.7443,   0.3575,   0.1713,
          0.2739,   0.2437,   0.3787,   0.0491,   0.3897,  -0.0146,  -0.3081,
          0.5420,   0.1989,  -0.2162,   0.0197,  -0.0494,   0.4239,  -0.0211,
          0.2037,   0.0399,  -0.0559,  -0.3701,   0.7060,  -1.1162],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9693e-01, -1.5482e+01,  1.7274e+00,  2.3801e-01,  1.1062e-01,
         2.1417e-01, -6.0071e-01, -4.9014e-01, -3.0575e-01, -1.3798e-01,
         2.5648e-03, -6.7162e-02, -4.7945e-02,  5.7859e-02, -2.7601e-02,
        -2.2344e-01, -3.2695e-01, -1.4475e-01,  2.2711e-02, -1.6044e-01,
        -1.4603e-01,  2.1639e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1684, -7.5792,  0.0301,  0.9686,  0.1544,  0.1388,  0.0609,  0.1031,
        -0.1009, -0.1771, -0.4952, -0.2513,  0.1877, -0.0380,  0.0669,  0.0506,
        -0.1857,  0.1436,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5745e+00, -2.2509e+01, -1.2619e+00,  4.5633e-01,  2.4596e-01,
         2.2920e-01,  2.0493e-03, -5.3181e-01, -2.2054e-01, -4.2474e-01,
        -4.3803e-01,  3.5349e-01,  6.5395e-01,  3.5289e-01,  4.2448e-02,
         5.3829e-01,  1.2404e-01,  1.3343e-01, -9.1964e-02, -2.9417e-02,
        -2.0557e-02,  5.4931e-01,  8.1509e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.8636, -23.2158,  -1.0754,  -0.8354,   0.3965,   0.1102,   0.1995,
         -0.6987,   1.2900,   0.1325,   0.0696,  -0.3874,  -0.0284,  -0.4175,
          0.7678,  -0.4774,   0.4641,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9356e-01,  1.7069e+01,  1.3654e+00, -4.6486e-01, -3.9684e-01,
         4.6524e-01,  3.3565e-01,  6.4728e-01, -4.9393e-01, -1.4715e-01,
         4.4555e-01, -1.3744e-01, -2.1952e-02, -2.0337e-02, -2.7603e-03,
        -6.8288e-01,  2.7587e-01, -2.9491e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3637, 17.1245,  0.7650, -0.6089, -0.3519, -0.7066, -0.0974,  0.3698,
        -0.4432,  1.5262,  0.0561,  0.5002, -0.2814, -0.2102, -0.0601,  0.3929,
        -0.1640, -0.2880,  0.0527, -0.2217,  0.1814, -0.2340, -0.7241, -0.4803,
        -0.4985, -1.9411,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4867e-01, -1.5430e+01,  2.8642e+00,  4.4582e-01, -5.4947e-01,
        -4.0035e-01, -1.1191e-01,  9.8766e-02, -2.7333e-01, -1.4204e-01,
        -5.8738e-02,  3.4639e-01,  1.2841e-01,  4.1648e-02, -6.0214e-02,
         1.7106e-01,  1.5210e-01,  5.5736e-02, -3.0322e-02,  1.2357e-01,
        -4.8737e-02, -3.1632e-01,  1.0660e-01, -1.3154e-01,  1.0627e-01,
         7.7613e-02,  9.7636e-02,  8.0594e-03, -1.7387e-01, -3.0175e-02,
         1.4493e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5917e-02,  9.4239e+00,  5.6745e-01, -2.6849e-01, -6.4858e-02,
         3.6068e-02,  1.8437e-01,  1.2809e-02, -1.8033e-01,  3.2620e-01,
        -3.1883e-01, -4.6460e-01,  1.1249e-01,  1.5885e-01,  4.2148e-02,
         6.9133e-03,  2.9009e-01, -1.0386e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.3544e-01,  9.6722e+00,  7.0150e-01, -1.1624e+00, -6.9250e-01,
         3.4372e-01, -1.7197e-01,  4.7540e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.7324e+00,  3.0104e+01,  1.3416e+00,  4.0932e-02,  1.4296e+00,
         6.9444e-02, -3.2780e-01, -1.4026e-01, -3.6195e-01,  6.8041e-01,
        -9.3507e-02, -2.8872e-01,  3.1183e-01,  2.5825e-02,  3.1938e-01,
         3.6197e-01,  3.3678e-01, -1.5869e-01, -8.7097e-02, -3.9520e-02,
        -4.1797e-01, -6.1548e-01, -4.6332e-01, -1.6360e-01, -3.9134e-01,
         3.2467e-01, -2.4106e-01,  1.8467e-01,  6.9326e-02,  5.1978e-03,
         2.4274e-01,  2.7246e-01,  8.9713e-02,  1.4914e-01, -1.3064e-01,
         2.4996e-01, -2.0522e-02, -2.9327e-02,  1.4318e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7046e-01,  9.3740e+00,  2.1969e-02, -2.7618e-01,  6.9688e-02,
        -6.7464e-02,  3.0275e-01,  1.4002e-01,  2.9724e-01, -2.5040e-01,
         7.6333e-02,  1.7920e-01,  3.5487e-01,  4.2263e-02,  1.8420e-02,
         9.6269e-02, -6.4557e-02, -9.8020e-02,  4.4222e-02, -5.6688e-02,
         1.0122e-02, -1.8088e-01, -1.1095e-03,  9.4726e-03, -5.9046e-02,
        -6.0567e-02, -4.1181e-02, -9.1968e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.9159, -11.1759,  -0.9826,  -0.4266,  -0.4440,   0.2825,  -0.5633,
          0.5810,   0.0454,  -0.3576,  -0.1174,  -0.5651,   1.2172,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.6749e-01,  1.1505e+01, -8.4856e-01, -3.9521e-01,  2.0465e-01,
        -1.6221e-01, -3.0548e-01,  2.9303e-01,  1.4041e-02, -1.7317e-01,
         6.3895e-03, -3.0436e-01, -4.6705e-03,  2.4380e-01, -3.0972e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5658e-01, -1.0882e+01, -7.0162e-01,  7.5431e-01, -6.3592e-01,
         5.6456e-02,  1.9698e-01,  1.2925e-01, -1.0441e-01,  1.3678e-02,
         7.9219e-02,  5.8924e-02,  1.4752e-02, -7.4919e-02, -8.8318e-02,
        -7.3228e-03,  5.3943e-02, -3.8239e-01,  7.7260e-02,  1.1086e-01,
         1.5207e-01,  7.8582e-03,  8.4495e-03, -4.7368e-02,  1.4936e-02,
         2.8004e-01, -1.5088e-02, -1.5629e-01,  1.1440e-02, -9.3660e-02,
        -2.0516e-01,  5.4425e-02, -1.7873e-02,  1.2307e-01, -1.6017e-01,
         1.1534e-02, -3.0641e-02, -9.8214e-02,  5.4072e-02,  6.6606e-03,
         8.6635e-02,  6.0108e-02, -2.6169e-02,  3.0804e-02, -6.7844e-02,
         2.6469e-01,  1.3890e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6687e-01,  2.4264e+01,  8.4738e-01, -1.3939e-01,  3.8564e-01,
        -8.2068e-02, -3.1662e-01,  1.3700e-01, -1.1343e-02, -3.4134e-01,
        -1.3647e-01, -1.0067e-01,  9.0399e-03,  4.6283e-02, -5.5739e-02,
         9.2052e-02, -2.0699e-01, -2.1374e-01,  1.2592e-01, -3.2385e-02,
        -8.5358e-02, -8.0617e-03,  1.3670e-01,  1.8400e-01, -1.1697e-01,
         3.5549e-02,  1.0850e+00,  2.3394e-01,  1.8449e-02,  2.2282e-01,
         3.0047e-01,  1.5066e-02, -4.5931e-02,  3.1332e-02, -1.3454e-02,
         3.7645e-03, -6.5492e-02,  9.5968e-03, -1.3976e-01, -2.2541e-01,
         5.0395e-01, -7.5682e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3306e+00, -1.6632e+01, -7.9550e-01,  1.0454e-01,  6.1867e-02,
        -4.1306e-01, -1.8852e-01,  3.0793e-03,  2.4606e-02, -7.7562e-01,
        -4.3511e-01,  5.5434e-01, -3.1401e-01, -1.1110e-02, -2.5994e-01,
         5.6629e-01, -9.2927e-02, -3.1668e-01,  2.7228e-01, -1.6514e-01,
         2.4756e-01,  8.1993e-02,  2.7074e-01, -2.0497e-01, -1.1476e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5660e-01,  1.2202e+01, -3.7945e-01, -1.4801e-01, -1.7397e-01,
         1.4038e-01,  9.4366e-02, -7.6391e-02,  6.7328e-02, -1.9069e-03,
        -5.3760e-02, -1.8292e-02, -4.6381e-04,  5.0248e-02,  2.6998e-01,
         1.1477e-01,  4.3412e-02, -3.3062e-02,  2.4592e-02, -4.8575e-01,
        -9.5979e-02,  4.0110e-02,  2.1430e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.6500e-02, -2.6783e+01, -1.2215e+00,  8.4290e-01,  4.6057e-01,
        -3.5088e-01,  4.0510e-01,  9.8673e-02,  6.0413e-01, -4.3885e-01,
        -3.6858e-01,  5.1795e-01,  2.9992e-01,  4.4936e-01,  1.8873e-01,
         3.4441e-02, -1.3151e-02, -2.7581e-01, -1.6301e-01,  1.6204e-01,
        -1.5841e-01,  2.4290e-01,  1.8721e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2174e+00,  2.2781e+01, -1.6107e-01, -1.0088e+00,  1.5544e-01,
        -6.8496e-03,  4.6870e-02, -9.3299e-01, -7.8180e-02, -4.2098e-01,
        -3.0023e-01,  1.7579e-01, -1.3914e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8337, 12.9780,  0.1390,  0.1152, -0.0785,  0.0432, -0.1676,  0.0539,
        -0.0179,  0.1861,  0.1585,  0.7007,  0.3114, -0.0242,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.9666e-02,  7.5026e+00, -2.5072e-01, -1.2275e-01,  1.9883e-03,
        -2.3044e-02,  8.1626e-02, -4.7659e-02,  1.2293e-01,  5.8429e-02,
         7.5238e-02,  8.5366e-02,  7.0246e-02,  5.6059e-02,  1.3191e-01,
         2.1343e-02, -1.6754e-01, -7.1500e-02, -1.7419e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-6.0162e-01, -1.4610e+01,  7.9698e-01,  1.2311e-01,  3.9904e-03,
        -4.7664e-01, -1.6112e-01,  1.2599e-01,  9.8108e-02, -7.9012e-02,
        -8.9377e-02,  1.5840e-01,  1.7147e-02, -1.8344e-01,  1.6603e-01,
         3.8524e-02,  5.0617e-03,  3.4962e-03,  4.3410e-02, -2.2008e-01,
         2.6368e-01, -1.2039e-01, -8.2808e-02,  7.5575e-02, -1.4209e-01,
        -1.3123e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.3531, -14.2588,   0.6103,  -0.4977,  -0.6117,   0.2307,  -0.3281,
          0.1662,  -0.3386,   0.3762,   0.0202,  -0.0276,  -0.1582,  -0.3782,
         -0.0754,  -0.0650,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8164, 10.5439,  0.2477,  1.4436, -0.1765,  0.0944,  0.0537,  0.3171,
        -0.3065,  0.1936, -0.1368, -0.7288, -0.1657,  0.0633, -0.1715, -0.3605,
         0.3543,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3863e+00,  2.1537e+01,  5.8960e-01,  1.1052e+00,  4.9602e-01,
         2.0199e-01,  1.5927e-01,  1.9752e-02, -2.8300e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3630e-01,  2.1300e+00,  1.5085e-03, -1.2854e-01, -4.8107e-01,
        -2.9911e-02, -2.3732e-02,  1.2491e-01, -3.2229e-02,  2.1627e-02,
         1.2304e-01,  1.4988e-02,  2.3340e-02,  4.6902e-02,  1.5293e-02,
         3.2633e-02,  1.8374e-02,  1.0519e-02, -4.8473e-03,  7.2563e-02,
         4.6325e-02,  2.0213e-02,  2.1016e-02, -9.5685e-03, -3.6789e-04,
         1.1139e-02,  1.4185e-02,  1.8700e-02,  9.6739e-02,  3.1747e-02,
         1.6873e-02,  7.7907e-02, -1.9020e-02,  1.9312e-02,  2.9635e-02,
         1.0661e-02, -2.1267e-03, -3.1587e-02, -3.3265e-02,  4.6677e-03,
        -1.2452e-03, -2.0350e-02, -1.2321e-02, -4.4837e-02,  1.9467e-03,
         9.8799e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7270e-01, -1.8115e+01,  7.0531e-01,  8.2503e-01, -4.0246e-01,
         6.3481e-02, -3.8888e-01,  3.0296e-03, -3.2775e-01,  3.3341e-01,
        -2.8454e-01, -2.0673e-02, -5.3821e-01, -1.3833e-01, -1.6422e-01,
        -3.4594e-02, -2.7213e-01, -1.7093e-01,  8.1176e-02, -2.3478e-01,
        -2.3017e-01, -2.5990e-01, -4.3480e-01, -3.4172e-02,  3.8664e-02,
        -1.9924e-01, -3.8495e-01, -7.8338e-02, -2.0467e-01,  6.4025e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6443e+00,  3.1223e+01, -2.6704e+00, -8.6069e-05, -1.8881e+00,
         3.7856e+00,  9.7997e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9875, 12.4293,  0.3138,  0.1326,  0.2524, -0.0686,  0.2293, -0.0125,
        -0.4546,  0.3986,  0.3214,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.9552, -26.0057,  -2.4993,   1.9206,   0.4865,   0.9606,   0.2321,
          0.6536,   0.1984,   0.4600,   0.3806,  -0.5072,  -0.8267,   1.0380,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2233e+00, -2.0758e+01, -4.5027e-01, -9.6141e-02, -8.5596e-01,
        -1.1087e-02, -8.3265e-03,  1.0668e-02, -2.6827e-01,  4.2790e-01,
         7.8797e-02, -1.1422e-01, -6.1029e-02, -1.1457e-01, -1.5014e-01,
         4.0912e-02,  1.8386e-02,  1.8632e-01,  6.4580e-02, -3.2548e-01,
         1.1243e-01, -1.3642e-01, -8.1901e-02, -2.4002e-01, -3.8952e-05,
        -1.5629e-01,  2.3451e-01, -2.2320e-01,  1.8888e-01, -1.5156e-01,
        -3.5076e-01, -7.1833e-02, -3.5769e-01, -6.3071e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5654,  9.6420, -0.4408,  0.1238,  0.5771, -0.2986, -0.2162, -0.0351,
         0.2290, -0.0679, -0.2196, -0.3501, -0.0516,  0.5444,  0.6114,  0.1440,
         0.1105, -0.3714, -0.0315, -0.1238,  0.0603,  0.0356, -0.3723,  0.1636,
         0.3413,  0.2203,  0.2169, -0.1571,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.1905e-01, -1.8744e+01, -9.0781e-01,  9.1213e-01, -1.3090e-01,
         7.1657e-01, -1.9367e-01, -1.8471e-01, -2.0882e-01,  2.6726e-02,
        -2.4944e-01,  8.9721e-02, -4.3659e-01, -7.8732e-01,  3.1248e-01,
        -1.8219e-02,  8.4404e-02, -4.2387e-01,  2.5031e-02, -1.7138e-02,
        -1.7823e-01, -1.3449e-01, -9.1618e-01,  6.2878e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-1.0860e+00,  9.8115e+00,  2.7162e-01,  7.1625e-01,  1.7051e-01,
        -7.3321e-03,  1.1676e-02, -1.9370e-01, -8.4529e-02,  2.1632e-02,
        -2.0235e-01, -5.9291e-02,  4.8345e-02,  2.0723e-01,  1.7210e-02,
         2.5641e-02,  1.0397e-02, -2.7453e-01,  8.5754e-02,  4.8019e-03,
        -1.4573e-01, -9.5184e-02, -6.1675e-02, -6.4140e-02,  3.3454e-02,
        -5.8146e-02, -9.9117e-02,  1.6402e-01,  1.0924e-02, -3.0559e-01,
        -5.0442e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0602, -8.0055,  0.4179, -0.0788, -0.3744, -0.3306, -0.3977, -0.3522,
        -0.1995, -0.1115, -0.1603,  0.8132,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.6268, -23.0604,   0.6521,  -0.9292,  -0.8645,   0.1418,   0.3020,
         -0.0665,   0.2339,  -0.0554,   0.3149,   0.2950,  -0.6602,   0.3707,
         -0.1983,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6095e-02,  6.8125e+00, -8.1473e-02, -2.5873e-01, -1.4580e-01,
        -1.8990e-02,  5.3134e-02,  9.5058e-02,  6.2223e-02, -1.0229e-02,
        -1.8776e-01, -3.6207e-02, -7.4382e-02,  1.9350e-02, -4.0682e-02,
         1.3000e-01, -7.3249e-02, -6.0063e-02,  3.6008e-02, -8.1690e-02,
         2.6346e-02, -6.6989e-02, -3.1828e-02,  2.6685e-02, -5.2085e-02,
        -1.3064e-02,  4.3369e-02, -5.5552e-03, -5.6514e-02,  8.5498e-02,
         4.2758e-02, -2.8092e-02,  9.3741e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5880e-01, -2.1936e+01, -1.1550e+00,  8.1744e-01, -1.7649e-01,
         9.8131e-01, -4.8396e-03,  4.9378e-01, -2.0191e-01, -2.5026e-01,
        -1.0915e-02, -2.5946e-01, -1.5852e-01,  1.4467e+00,  5.2028e-01,
         8.8926e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3121, 18.0110,  2.8106,  0.1303,  0.5839,  0.3410,  0.8925,  1.5795,
         0.4279,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6206, 21.2605,  0.3991,  2.8198, -3.2579,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5164e-01, -2.2783e+01,  5.3530e-01, -5.1019e-01,  3.2480e-02,
         6.8101e-01,  4.2780e-02, -8.0736e-02,  7.7280e-01, -3.3481e-01,
         1.9318e-01,  3.8280e-01,  8.3752e-01, -2.2725e-01,  2.9083e-02,
         6.4801e-02,  3.0338e-02, -1.3673e-02, -3.5000e-01, -1.4732e-01,
        -1.3210e-01, -2.6563e-01, -1.3924e-01,  3.9868e-01,  4.4761e-02,
        -3.1963e-01,  2.5228e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1736, 18.8579,  0.9499,  0.0540,  0.6079,  1.5694,  0.6598,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2256e+00, -3.1034e+01, -1.3144e+00, -2.2982e-01,  2.4352e-02,
         1.5238e-01, -1.0706e+00,  4.2750e-03,  7.6964e-01,  1.6400e+00,
         2.9481e-01, -1.2324e+00, -4.5986e-01,  1.1128e-01,  2.3331e-01,
        -5.9588e-01,  3.5180e-02, -6.5445e-01,  5.2775e-01, -8.6204e-01,
         1.2708e+00, -1.5966e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.3721, -11.0757,   0.1306,  -0.2036,   0.9234,   0.0731,   0.3582,
         -0.7074,  -0.1448,   0.0241,  -0.2740,   0.2325,   0.1027,  -0.0938,
         -0.1090,  -0.1836,  -0.2047,  -0.4794,  -0.3710,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0794e-01,  1.5612e+01,  6.3987e-01, -2.4439e-01,  2.2134e-01,
         5.8755e-01,  1.2109e-01,  1.0273e-01, -2.3176e-02, -2.5283e-01,
        -1.3143e-01,  1.0071e-03, -6.5521e-02, -5.3846e-02,  1.1198e-01,
         4.7934e-02, -1.3623e-02,  6.5796e-02, -1.1393e-01,  1.4023e-01,
         1.2155e-01, -2.3717e-01,  1.1818e-01,  1.1155e-01,  4.3465e-03,
         1.8994e-01, -6.6627e-03,  1.1191e-01, -2.2527e-02, -1.5030e-02,
         1.8166e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 3.9538e+00, -2.1432e+01, -1.2524e+00,  2.3314e+00,  8.2340e-01,
         7.4748e-01,  1.3918e-01, -3.5834e-02, -3.7773e-02,  2.6987e-01,
         9.0609e-02, -2.6971e-01, -4.4170e-01,  2.6796e-01, -2.5718e-01,
         1.2611e-02,  1.7798e-01, -6.9591e-01,  1.1496e-01, -3.4202e-01,
         2.0284e-01,  2.8883e-01,  4.2925e-01, -2.7699e-01, -2.7483e-01,
         1.6558e-01, -1.1608e-01,  3.7557e-01,  1.6662e-01,  7.6506e-02,
        -3.4850e-01,  1.3717e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.1466, -20.3000,  -1.7642,   0.1511,  -0.0899,  -0.2008,  -0.6734,
         -0.3748,  -0.5627,   0.2608,   1.2378,  -0.0808,   0.3348,  -0.0451,
          0.1140,  -0.0961,  -0.4424,  -0.3181,   0.0847,   0.0519,  -0.4947,
         -0.3854,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1566e-01,  8.4054e+00,  3.9495e-01, -2.1568e-01, -2.7858e-02,
         4.4149e-01, -4.4137e-01,  3.0543e-01,  1.4278e-01,  9.0886e-02,
        -2.2759e-01, -7.4827e-02, -6.0240e-02, -6.9217e-02,  7.8251e-02,
         1.9287e-02, -3.0579e-02,  1.0660e-01,  8.8493e-02,  7.2074e-02,
         5.8682e-02, -2.0080e-01,  1.2607e-01, -5.2630e-02, -4.9912e-02,
         8.5686e-02, -3.8650e-03,  1.8033e-01, -3.0401e-02, -2.7731e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4299,  3.8939, -0.0643,  0.1643, -0.0957, -0.0076, -0.0523, -0.0290,
         0.0623, -0.0578,  0.0048, -0.0281,  0.0273,  0.0590,  0.0103, -0.0345,
        -0.0274,  0.1836,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7450e-01,  2.1878e+01,  1.2349e+00,  5.0094e-01, -8.6256e-02,
        -1.2550e+00,  2.7163e-02,  1.1109e-03, -2.8337e-01, -2.2876e-02,
         7.6205e-02, -1.7104e-01, -2.5765e-01,  1.5273e-01, -4.3801e-02,
         4.4973e-02,  1.2685e-01, -1.2030e-01, -1.4340e-01, -2.3587e-01,
        -1.7237e-01,  1.9621e-03, -5.1137e-01,  2.2112e-02,  1.1327e-01,
        -1.6276e-02,  3.1013e-01, -1.7672e-01,  6.2838e-02, -5.4866e-02,
        -6.3802e-02, -1.7360e-01,  2.9708e-01,  1.6875e-01, -9.9861e-02,
        -1.4933e+00,  5.2762e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.3631, -19.5438,   1.1588,   1.3443,   0.1745,  -0.0399,  -0.3480,
          0.5737,   0.0415,  -0.3060,  -0.1413,   0.5544,   0.1134,  -0.2609,
         -0.2973,  -0.2172,  -0.1515,  -0.2428,  -0.1960,  -0.5448,   0.3243,
         -0.5022,  -0.8110,  -1.1834,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0425e-01,  1.3929e+01, -7.5301e-01,  2.5873e-01,  3.3129e-01,
        -2.7622e-01, -5.2548e-01,  3.6042e-02,  2.4851e-01, -2.8790e-01,
        -1.0123e-01, -2.9589e-01, -3.9402e-01,  5.7300e-02,  1.2164e-03,
         5.4746e-02,  1.3176e-01, -1.0386e-01,  1.1397e-01, -5.7331e-02,
         1.2502e-02, -7.2240e-02,  5.8230e-02,  1.0240e-01,  3.4181e-01,
         4.8955e-01,  6.0726e-02,  5.9678e-02,  1.7935e-02, -1.7680e-01,
         1.5940e-01,  1.0003e-02,  3.5846e-02, -3.0229e-02,  2.7440e-01,
        -1.2589e-01, -9.7813e-02, -1.8238e-03,  6.3103e-02,  9.2048e-02,
        -1.4591e-01,  7.2211e-02,  4.8023e-02,  5.1923e-01, -5.4136e-01,
        -5.3488e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6502,  8.1657,  1.1013, -0.2099, -0.8713,  0.1853,  0.0654, -0.0700,
         0.0573,  0.2182,  0.2121,  0.0473,  0.4137, -0.0237,  0.0905, -0.0287,
        -0.0173, -0.2596, -0.0889,  0.0799,  0.1564, -0.2001, -0.1069,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.4580, -16.0256,   1.7530,  -0.6966,   0.1110,   0.6039,   0.1124,
         -0.1014,   0.5240,   0.8899,  -0.2590,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4304e+00,  1.3035e+01,  1.7776e-01, -5.4187e-01, -1.2489e-01,
        -3.2883e-02,  1.6821e-01, -9.4084e-02, -1.6082e-01, -1.2015e-01,
         2.1339e-02,  6.4137e-03,  7.5469e-01, -4.1787e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  3.0661, -14.3173,  -0.4535,   0.6192,   0.4317,   0.0428,   0.4073,
         -0.1105,   1.0632,  -0.3439,  -0.6934,  -0.3609,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3271e-01,  1.0403e+01,  2.5618e-01, -2.0000e-01, -2.2034e-01,
        -7.4404e-03, -1.3772e-01,  1.6577e-01, -6.8247e-02,  2.0846e-01,
        -3.0747e-02,  9.3665e-02, -1.2358e-01, -2.1500e-02,  2.2373e-02,
         3.5251e-02, -4.9337e-02, -9.1257e-02, -7.2178e-02, -2.0558e-01,
        -6.4759e-02,  2.9809e-02,  9.8662e-02, -5.9935e-02,  1.5351e-01,
        -2.5667e-01, -9.0807e-02, -2.9075e-02, -1.0167e-01,  2.7580e-02,
        -1.1831e-01, -4.3594e-03, -5.4801e-02,  1.6550e-01,  6.1682e-02,
         5.8905e-02, -9.0877e-03, -5.7355e-02, -1.6339e-02, -5.6837e-02,
        -3.2621e-02, -5.3768e-04, -3.5109e-02,  2.7683e-02, -1.9752e-02,
         7.6521e-02,  1.2737e-02, -2.1078e-02, -2.2850e-02, -7.8459e-02,
         1.0440e-02,  6.4001e-02, -4.5363e-02,  1.9387e-02,  2.9417e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([  1.3982, -15.8842,   0.7346,  -0.1083,   3.0212,   1.0592,  -0.9022,
         -0.7133,   0.3776,   0.5199,  -0.4187,  -0.2410,  -0.2781,   0.3006,
         -0.9719,   0.3862,   0.0466,  -0.3230,   0.0453,   0.1585,  -0.2903,
         -0.5101,  -1.2828,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0029e-01,  1.4531e+01, -1.2901e+00,  8.5788e-02, -1.1903e-02,
        -2.5071e-01, -4.6792e-01,  3.8893e-01, -2.8532e-02,  5.5085e-01,
        -2.5666e-01, -2.0477e-01, -3.7740e-01, -2.6572e-01, -3.1214e-01,
         1.9009e-01, -1.0484e+00, -1.2545e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7273,  9.4115, -0.2495, -1.6018, -0.5758, -0.5462, -0.1880, -0.6119,
        -0.1221, -0.3063,  0.2079,  0.0451,  0.6705, -0.2959,  0.1139,  0.2906,
        -0.5781,  0.2792,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.8200, -19.9167,  -0.2962,   0.1234,  -0.4119,   0.4079,   0.2815,
          0.9970,   0.5669,  -0.5056,   0.7255,  -0.4419,   0.0926,   0.0382,
          2.9736,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5120e-01,  1.6828e+01, -7.1917e-01,  1.1543e+00, -4.8715e-01,
         1.6893e-01,  2.2011e-01,  3.3625e-01,  6.8892e-01,  1.1726e-01,
         2.6550e-01, -2.2535e-01, -1.8473e-02, -1.0441e-01,  2.0633e-02,
         7.2066e-02, -3.2579e-02, -4.8456e-01, -2.3921e-01, -2.0965e-01,
         3.0470e-01, -3.3922e-01, -8.0913e-02,  3.5201e-02,  9.9031e-02,
         5.4063e-01,  2.8174e-01, -5.3799e-02,  1.1024e-01,  8.1454e-03,
         1.1172e+00, -7.5259e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.2700e-01, -1.0134e+01, -9.4603e-01, -1.5209e-01, -2.0733e-01,
        -4.9205e-02,  1.1924e-01,  1.5594e-01,  1.8892e-01, -5.8567e-02,
         2.7225e-01,  8.5519e-02,  1.0246e-02,  1.7174e-02, -3.4037e-02,
         3.5359e-01,  5.8299e-02,  2.0622e-01, -8.3522e-02,  9.0271e-02,
         3.3097e-01,  1.4518e-01,  2.2404e-01,  3.6781e-02,  1.3039e-01,
         8.3384e-03, -4.1389e-03, -2.5168e-01, -2.5319e-02, -8.0910e-02,
         1.0511e-01,  5.5455e-02, -4.5931e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.5237e-01,  1.8669e+01,  9.0028e-01,  5.4144e-01,  6.4455e-01,
        -3.1524e-01, -4.8262e-03, -2.0418e-01,  3.0689e-01,  2.4150e-01,
        -4.1388e-02, -2.9552e-01, -4.5390e-01, -1.2834e-01, -7.4005e-02,
         3.3839e-01, -2.0462e-01, -1.5363e-01, -1.3515e-01, -2.1465e-01,
        -2.5557e-01, -1.4174e-01, -1.7754e-01, -8.8369e-02, -7.6171e-02,
        -3.0418e-01,  1.5668e-01,  2.5112e-01,  1.3927e-01,  8.8021e-02,
        -9.3410e-02,  1.5475e-01, -2.5609e-02, -1.0574e-01,  2.7160e-02,
        -6.3655e-02,  2.0746e-02, -3.1438e-03, -1.2358e-02,  2.4303e-02,
        -4.5594e-02, -1.4652e-02, -8.6990e-02, -6.4502e-02, -1.1172e-01,
         8.8668e-02,  5.0889e-02, -1.6687e-01,  1.0002e-01,  4.9106e-02,
         1.4153e-01,  3.0421e-02,  3.5917e-02,  1.1181e-03,  7.5973e-02,
         2.2730e-02,  3.1982e-01, -2.3418e-03], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.3894e-01, -1.5389e+01, -1.4355e+00, -4.7150e-03, -9.9317e-02,
         1.2296e-01, -1.1754e-02,  4.5018e-01,  1.5179e-01,  2.0271e-01,
         2.0322e-01,  1.7454e-01,  7.6719e-02, -5.9312e-01,  1.2875e-01,
         2.1596e-01,  5.1363e-02,  1.0661e-01,  1.1632e-01,  7.8838e-02,
         1.6525e-02,  2.7510e-02,  1.3756e-01,  6.2281e-02, -3.4499e-01,
         1.1335e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9307, 23.4663, -0.0816, -0.6335, -0.1854,  0.4436, -0.2177,  0.1649,
        -0.3187, -0.3067,  0.2197, -0.0740,  0.0571,  0.2558, -0.2360,  0.1465,
         0.3946, -0.3444,  0.2541,  0.5983,  0.2134, -0.1504, -0.1505, -0.1984,
         0.2443,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0308e-01, -1.6769e+01, -4.0604e-01, -1.6060e-02, -4.4579e-01,
        -3.5170e-01,  1.3698e-01,  2.8460e-01,  1.9786e-01, -3.6903e-01,
         2.4432e-01, -1.7219e-01, -3.9189e-01,  6.5043e-02,  3.2719e-01,
        -1.8695e-01,  1.6395e-01, -8.7537e-02, -4.7275e-02, -1.5866e-01,
         2.9085e-02,  1.8043e-01,  1.7843e-02,  1.4746e-01, -1.4183e-01,
        -4.7222e-02,  1.5236e-03, -8.8296e-02,  1.7622e-01,  5.9082e-02,
        -2.4244e-02,  1.0688e-01,  5.5957e-02,  4.5333e-02, -1.1714e-01,
        -9.9289e-03,  1.1053e-01, -1.7135e-01, -3.6974e-01, -1.3974e-01,
         8.5675e-02,  5.0360e-02, -1.0320e-02,  2.2646e-01,  2.0638e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3265, 12.7879,  0.6619,  1.2927, -0.0654,  0.1200,  0.1584, -0.0191,
         0.0217, -0.2341, -0.4508,  0.0636,  0.0389, -0.1932, -0.1503, -0.2151,
        -0.3732, -0.2685,  0.0562,  0.0394,  0.1375, -0.1397, -0.7550,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1568, 16.5720, -1.1367,  0.3294,  0.6689, -0.0346,  0.6662, -0.1183,
         0.1183, -0.3058,  0.4655,  0.2265,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.2992e+00,  1.6759e+01,  2.1549e+00, -6.5314e-01, -3.2851e-01,
         2.8973e-01,  8.6650e-01, -2.1363e-01, -2.0093e-01,  1.6799e-01,
         5.0295e-01,  2.0257e-01, -3.6621e-01,  3.5038e-01, -1.3775e-01,
         8.1659e-03, -1.6262e-01,  3.9283e-01,  2.3833e-01, -6.8234e-01,
        -3.4195e-01, -8.0567e-03, -5.4967e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3215e-01, -1.0619e+01, -1.3831e+00,  5.0817e-01, -1.0459e-01,
         3.9822e-02, -1.0005e-01, -1.3943e-01, -6.2618e-02, -8.8273e-02,
        -3.6624e-02, -7.6590e-02,  7.2236e-02, -4.3254e-02,  1.5524e-02,
        -6.3114e-02,  4.7008e-02,  1.2458e-01,  1.0860e-01, -5.5150e-03,
        -5.5331e-02,  2.2639e-02,  5.0436e-02,  8.9305e-02,  1.4206e-01,
        -5.3771e-02, -3.9029e-02, -1.1526e-01, -1.7069e-02, -8.5451e-02,
         1.2192e-02, -4.0488e-01, -1.0866e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7483e-01,  2.2133e+01,  3.3280e-02, -1.5363e-01, -1.8764e-01,
         4.7339e-01, -7.0083e-02, -2.0116e-01,  2.4629e-01, -3.6610e-02,
        -6.2755e-02,  4.5405e-01,  2.6932e-01,  5.6522e-01,  7.3696e-01,
        -4.7512e-01, -2.5302e-03,  4.2330e-01,  1.0196e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0307, -3.5170,  0.1160,  0.0301,  0.1153, -0.0289,  0.0910,  0.0105,
         0.0053, -0.0803,  0.0410, -0.0611, -0.0253, -0.1075,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4912e-02,  1.0191e+01,  5.4152e-01,  2.4010e-01, -2.1143e-01,
        -6.5421e-02,  4.3099e-02,  3.0091e-02,  2.3227e-01,  8.0529e-02,
         8.9305e-02,  1.8363e-02,  1.4478e-01, -3.5609e-01, -1.2627e-01,
        -6.9840e-02, -1.8030e-01,  3.7681e-02,  3.1271e-02,  1.7526e-02,
         9.1980e-03, -2.5229e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.1515e-01,  2.3532e+01,  1.0110e+00,  4.8308e-01, -2.1488e-01,
         3.4807e-01,  1.8332e-01, -3.9201e-02,  2.4040e-02, -9.0441e-02,
        -2.0021e-01, -6.6307e-01, -5.3293e-01, -8.6109e-02, -1.5070e-01,
         1.9219e-02,  1.3309e-01, -1.5058e-01,  1.5728e-01,  1.9985e-01,
        -3.8263e-01,  1.7766e-01,  1.9397e-02, -2.7097e-01, -2.3872e-01,
         4.4280e-02,  8.0048e-02,  2.1198e-01,  4.3458e-01,  2.1118e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0150, 11.0496,  0.0360, -0.6472, -0.0809,  0.6933,  0.0814,  0.2082,
         0.3465, -0.3126, -0.3211, -0.0478, -0.3129, -0.0421, -0.0171,  0.2869,
        -1.2568,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.9318, -26.2297,  -0.6548,  -0.4008,  -0.3026,  -0.5456,   0.5880,
         -0.1972,  -1.1332,  -1.1305,   0.6846,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7016e+00, -6.9205e+00, -7.8539e-01,  2.4141e-01, -7.9602e-02,
         1.0488e+00, -9.0290e-01, -1.1762e-01, -1.3820e-01, -9.2803e-02,
        -1.2398e-01, -1.4050e-01,  2.1838e-01, -1.7412e-01, -2.7177e-03,
         4.0348e-01, -6.3089e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1162e+00,  8.6037e+00, -1.2608e-01,  2.0130e-01, -1.0242e-01,
         1.2825e-01, -7.0145e-02,  8.1538e-02,  1.3079e-01, -3.1306e-02,
         6.5650e-02,  3.2171e-02,  3.4654e-02, -1.5233e-01, -1.1878e-01,
         5.6699e-02, -1.2507e-01, -1.0418e-03, -3.3100e-01, -1.1278e-01,
        -9.1025e-02,  1.6060e-01,  8.7550e-02,  6.1881e-02,  1.2155e-01,
        -6.5781e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6043e-01,  1.4775e+01,  1.0103e+00, -2.8657e-01,  1.4330e-01,
         9.5167e-01, -3.0541e-01,  5.5649e-01,  3.1990e-01,  6.3043e-02,
        -9.6755e-02, -3.2007e-01,  2.1533e-02, -4.5447e-02,  1.0526e-02,
         2.1478e-01, -1.4040e-01, -6.3317e-02,  3.0519e-02, -7.5982e-02,
         1.5560e-01, -6.1514e-02, -2.2611e-02,  5.8218e-02,  1.4071e-01,
        -2.1985e-02,  4.4006e-02,  5.3038e-02, -2.1057e-01,  1.6245e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8794e-01,  2.4153e+01, -3.7340e-01, -1.6905e-01, -5.1604e-01,
        -3.9029e-01,  4.9669e-03,  3.9454e-01,  8.7276e-02, -4.8390e-02,
        -4.3149e-01,  5.3977e-02,  1.4117e-01,  1.1895e-01, -3.4963e-01,
        -1.4229e-01, -2.5184e-01,  6.1752e-02,  2.2030e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 2.1209e-01,  7.7160e+00,  8.3772e-02,  2.8623e-01,  9.6074e-02,
        -2.6714e-01,  2.0243e-01, -5.9073e-02,  2.1287e-02,  3.7638e-02,
        -1.0652e-01, -7.9762e-02, -9.8278e-02,  5.7597e-02,  1.9304e-02,
        -3.7709e-01,  8.5689e-02,  3.9850e-02,  1.9877e-02,  3.2292e-03,
        -1.1949e-01, -1.1365e-01,  2.7032e-02, -5.9157e-02, -1.8571e-01,
        -9.4965e-02, -1.0254e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4253, 16.8369,  0.0449, -1.2593, -0.5984, -0.2887, -0.2170,  0.2473,
        -0.1255,  0.0787, -0.5158,  0.3905,  0.5962, -0.0771,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2720, 13.6321,  0.5134, -0.1097, -0.0475, -0.4474,  0.3898, -0.1575,
         0.2293, -0.1621, -0.1516, -0.0487, -0.2370,  0.2850,  0.0544, -0.2615,
        -0.0822, -0.1630, -0.1965,  0.2223, -0.1267, -0.0471, -0.1107,  0.1641,
         0.2952, -0.1444,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9762e-01, -5.1792e+00, -1.4644e+00,  1.3400e-03, -1.1246e-01,
         2.3292e-02,  2.0424e-02,  1.1458e-01,  4.3585e-02,  5.7195e-02,
        -7.1270e-03,  1.2073e-01, -3.1944e-01,  1.3769e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5276e-01,  1.1421e+01, -4.6751e-01, -1.2737e-01,  1.1851e-02,
        -7.9757e-02, -1.4515e-01, -2.0236e-02,  7.6126e-02, -3.6246e-03,
         2.9629e-03, -7.3933e-02,  7.3969e-02, -2.1664e-01,  1.5480e-01,
         6.6125e-02, -2.3115e-01, -1.3863e-01,  7.9238e-02,  2.8365e-01,
        -3.9292e-02, -4.3872e-02,  1.9128e-01,  1.9768e-02, -2.1708e-02,
        -2.3131e-02, -1.0825e-01, -2.2760e-01, -1.0955e-01, -4.3955e-02,
         8.0390e-02,  4.7349e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.2298e-01,  1.4177e+01, -3.9982e-01, -7.2062e-01, -4.0810e-01,
        -4.6865e-01, -2.1166e-01, -2.6276e-01,  2.1925e-01, -6.4999e-02,
        -8.0222e-02,  9.0693e-02, -3.1734e-01,  9.7461e-02,  3.3867e-01,
        -7.9383e-03, -1.6527e-01, -8.7258e-02,  3.1780e-01,  1.1354e-01,
         1.7065e-02, -2.8559e-02,  2.6860e-02, -9.4613e-02, -5.1355e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1305,  8.8703, -0.0739, -0.4041, -0.1844,  0.6177,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9369e-01,  1.7844e+01,  9.0434e-01, -3.6474e-01,  4.0928e-01,
        -2.0761e-02,  2.5715e-01, -1.0069e-01,  7.4631e-02,  4.4092e-02,
        -1.4850e-01,  2.8431e-02, -2.5018e-02, -7.8081e-02,  3.3189e-02,
         1.6975e-01, -3.0348e-01, -1.4739e-01, -2.6721e-02,  8.2351e-02,
        -5.7496e-02,  2.1727e-02, -9.0919e-02, -2.3621e-02, -1.0290e-03,
         1.6918e-01, -7.4668e-02, -7.5811e-02, -7.3343e-03, -8.6709e-02,
         2.4977e-01,  7.0828e-02,  1.4108e-01, -6.5706e-03,  2.2679e-02,
         2.3375e-01,  3.3659e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.8080e-01, -8.5385e+00, -4.5232e-01, -2.9816e-01,  1.7510e-02,
        -2.2429e-01,  9.6246e-03, -1.1509e-04,  2.8559e-02, -7.4954e-03,
         5.2230e-02, -5.7951e-02, -8.6760e-02,  1.9595e-02,  2.4361e-01,
         2.4747e-01,  9.0997e-02, -1.8451e-01,  5.1959e-02,  1.9224e-03,
         4.9653e-01,  3.8866e-02,  1.8739e-01,  2.5376e-02,  5.4971e-02,
         6.8679e-02,  1.8101e-01, -2.0366e-01,  6.8106e-02,  4.4289e-02,
         3.6385e-02, -1.7809e-02,  1.1315e-03, -1.9797e-01,  3.0043e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0806e-01,  9.5890e+00, -2.4110e+00,  9.6060e-02, -3.7582e-02,
        -2.7504e-02, -1.6282e-01, -6.0449e-02, -2.3723e-01,  2.5403e-01,
        -1.5931e-01,  6.1856e-03, -2.3936e-01, -7.4085e-01, -4.4067e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0285e-01,  1.9144e+01,  3.5950e-01, -6.6810e-01, -1.2278e-01,
         3.6723e-01,  6.3304e-01,  4.6921e-01,  2.1548e-02,  1.8926e-01,
        -8.2103e-02, -1.0036e-01,  5.2641e-02, -2.0606e-01,  6.4207e-02,
        -1.5373e-01,  7.7657e-02,  4.9545e-01,  6.1020e-01,  1.7754e-01,
         2.4291e-02, -9.1561e-02, -1.5976e-01, -1.3583e-01, -7.5717e-02,
         1.2396e-01, -1.3686e-01,  7.5817e-03, -1.7235e-01, -8.8213e-02,
         1.6197e-01,  3.2531e-01,  5.6520e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.1762, -20.8220,   0.1375,  -0.2787,   0.1122,   0.5414,   0.5501,
          0.1885,   0.2290,   0.2534,   0.2489,   0.4047,   0.0528,   0.3455,
          0.2028,  -0.0481,  -0.1565,  -0.3904,   0.2000,   0.0217,   0.4150,
          0.0402,  -0.1669,   0.0723,  -0.1054,  -0.3644,   0.0350,  -0.1198,
          1.2262,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.6298, 12.3999, -0.0859,  0.0499,  0.0291,  0.1096, -0.0276,  0.1871,
        -0.1185, -0.8991, -0.1176, -0.0297, -0.0711,  1.1372, -1.2260,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7454e-01,  7.1988e+00, -2.3032e-02, -1.0059e-01, -1.3409e-01,
        -1.7878e-01, -1.1412e-02,  7.5752e-02, -2.2309e-02, -2.3779e-01,
        -1.5522e-01, -1.0918e-01, -6.2543e-02, -1.3331e-01,  2.8732e-02,
         1.3523e-01,  4.8406e-02, -1.7632e-02,  1.5147e-02, -1.3275e-02,
         1.4315e-01, -1.8168e-03, -5.3010e-02,  3.1722e-02,  6.5006e-03,
         8.2058e-03,  1.0509e-01,  1.8453e-01, -4.1387e-02, -8.3497e-03,
         1.5487e-02,  8.6302e-04, -7.3334e-02, -3.6446e-02, -1.0931e-01,
        -6.8848e-03, -1.0768e-02,  1.1574e-02, -4.6463e-02,  3.1636e-02,
         8.7244e-02,  7.4771e-02,  1.2914e-02,  5.7845e-03,  4.8410e-03,
        -2.9476e-03,  7.5284e-02, -8.7200e-02,  1.4007e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5167e-01, -3.2875e+01,  7.4097e+00,  1.4452e+00,  9.3598e-01,
        -4.9582e-01,  6.3759e-01,  6.4624e-01, -2.2650e-01,  2.8249e-01,
         2.0378e-01, -3.6285e-02,  1.3414e-01, -8.3812e-03,  1.1638e-01,
         1.8352e-01, -6.2443e-01, -1.3516e-02, -7.0540e-02, -9.4336e-01,
        -1.1817e-01,  4.1999e-01,  2.1162e-01,  6.0767e-02, -2.2086e-01,
         3.1053e-01,  3.4700e-01, -1.9248e-01, -9.0466e-02,  2.8138e-01,
         2.7928e-01, -4.3808e-01, -3.7507e-01, -1.0204e+00,  3.6222e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7459, 18.9232,  0.7648, -0.2034,  0.5295,  0.3887, -0.0891, -0.0937,
         0.0366, -0.4176,  0.4850, -0.1006, -0.1398,  0.2388,  0.3008,  0.0595,
         0.0196,  0.3369, -0.6560,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.4885, -16.0010,   0.6434,  -0.0950,  -0.2694,  -0.3659,  -0.2394,
          0.0493,  -0.7407,   0.2057,  -0.3624,   1.0582,  -1.7271,   0.5959,
          1.1759,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1585e+00,  1.2082e+01, -7.7956e-01,  3.4433e-01, -7.9208e-03,
        -2.0401e-01, -6.7358e-01, -5.6071e-02,  2.9174e-01, -8.7744e-03,
         2.8791e-02, -3.1418e-01,  3.4744e-02, -3.0744e-02, -1.6300e-01,
        -1.5347e-01, -7.9510e-02, -6.5430e-02, -5.9053e-02, -1.9637e-01,
        -2.9712e-01,  5.7887e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0860e-01,  1.2406e+01, -2.7209e-01,  1.9434e-01, -7.3341e-02,
        -1.6250e-01, -3.2935e-01, -1.2334e-01, -3.3018e-01,  1.3608e-01,
        -9.5757e-02,  2.2547e-01,  4.8710e-02,  4.1892e-02,  1.7941e-01,
         9.4741e-02, -6.0134e-02,  1.8851e-02, -4.7742e-03, -3.6446e-02,
         2.3650e-01,  9.2624e-03, -2.1435e-03,  2.4713e-01,  1.4723e-01,
        -1.1558e-01,  1.6124e-01, -1.5504e-02, -1.9556e-01,  5.4981e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1320, 18.3729, -1.1059, -1.3772,  1.0110,  0.7660,  0.0885,  0.7229,
        -1.3745,  0.2466, -0.3210,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3615e-01,  1.8890e+01,  2.8583e-01,  2.8078e-01, -1.1017e-01,
        -3.6268e-01,  1.8607e-01, -1.0021e-01, -1.7779e-01,  4.7524e-02,
        -3.1793e-01,  1.0335e-01,  7.0633e-02, -1.3531e-02,  5.0189e-02,
         3.8989e-01, -8.7569e-02, -1.3323e-01, -1.3502e-01, -6.8489e-02,
         8.0901e-01,  9.1134e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1549e-02,  9.2440e+00,  6.3730e-01, -2.5513e-01,  1.1438e-01,
         1.4187e-01,  2.0806e-01,  5.6789e-02, -4.8822e-03,  5.6788e-02,
        -1.1119e-01, -1.0838e-01,  8.3863e-02, -1.1943e-01,  1.5148e-01,
         1.2308e-01, -3.7719e-02, -2.6715e-02,  5.3945e-02,  4.5626e-02,
        -5.0150e-02, -1.3324e-02, -1.7022e-01, -8.6638e-02,  1.6787e-01,
        -2.7590e-02,  1.1639e-01, -3.3112e-02, -2.5380e-01, -9.8775e-02,
        -3.2873e-03,  4.6514e-02,  6.7721e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6949e-01, -2.8885e+01, -1.3226e+00,  2.2611e+00, -1.6758e-01,
        -6.6532e-01,  2.7560e-01,  3.0008e-01, -5.4198e-01,  4.5594e-01,
         2.0534e-01,  1.8800e-01, -1.9868e-02,  7.4942e-02,  4.9445e-01,
        -7.7783e-01, -1.0844e-01, -4.1910e-01,  6.8440e-02, -1.5988e-01,
        -2.6219e-01,  4.9874e-01, -3.2299e-01,  7.3153e-02,  2.1924e-01,
         6.0911e-02,  8.7771e-01,  8.7446e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.5843, -28.3190,  -3.0571,  -0.4210,  -0.2399,   0.4161,  -0.4615,
          0.0547,   0.6750,  -0.1044,   0.1099,  -0.1098,  -0.3248,   0.0331,
          0.0970,   0.4139,   0.3633,   0.3594,   0.1558,  -0.1370,   0.1178,
         -0.1923,   0.1663,   0.9170,   0.9292,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-5.3440e-01,  9.7226e+00,  4.5187e-01, -1.7766e-01, -2.5219e-01,
         1.2655e-01,  3.4317e-02, -3.4190e-01, -3.6435e-02, -1.0404e-01,
         4.4181e-02, -6.2624e-02,  8.7596e-02, -6.8241e-02, -7.4407e-02,
        -1.1856e-01,  5.5273e-02, -8.9722e-02,  4.5606e-02, -2.4082e-01,
         9.3788e-02, -7.9992e-03, -2.3875e-01, -2.4941e-01, -1.2758e-01,
         7.9247e-02, -9.8360e-02, -1.7532e-01, -6.2328e-02, -1.0495e-02,
        -1.4856e-02, -3.9491e-02, -1.3707e-02,  4.0726e-02, -2.6323e-02,
        -4.0905e-02, -4.0224e-02, -5.1623e-02, -3.2773e-02,  5.8335e-01,
         2.9083e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8298e-01,  2.1795e+00,  1.5184e-02, -8.7662e-02,  4.3784e-03,
         1.3597e-02, -2.5225e-02,  1.1558e-02, -7.8853e-02, -1.9679e-02,
         4.1373e-02,  1.8988e-02, -2.0492e-02,  9.8279e-04,  8.4391e-03,
        -4.1140e-03, -4.3607e-02, -4.0374e-02, -6.0463e-03,  9.4333e-03,
        -1.2006e-02, -2.0613e-02,  5.7822e-04, -1.3066e-01, -1.8086e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6100, 10.3750, -1.2109, -0.8105,  0.0361, -0.1599, -0.3211, -0.1077,
        -0.4776, -0.7275, -0.4798,  0.1380, -0.0910, -0.0165, -0.2132, -0.0216,
         0.0789,  0.3192, -0.3312,  0.0142, -0.4074,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6029e-02,  1.2519e+01,  5.9274e-02, -1.3132e-01, -9.9263e-02,
         2.8156e-01,  4.6978e-01,  2.4259e-01, -8.9749e-02, -1.5563e-01,
         5.8072e-01,  1.6422e-02,  2.5723e-03, -6.4458e-02, -9.9652e-02,
        -2.6837e-01, -2.6424e-01, -1.2047e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.0156e-02,  2.3538e+00,  1.9482e-01,  9.4123e-03,  1.1985e-01,
        -6.1076e-02, -4.3203e-02, -4.3297e-02, -1.5617e-01, -1.3338e-02,
         6.0393e-02,  2.9859e-02, -2.8660e-02,  1.8038e-02,  7.6053e-03,
         2.2730e-02, -6.4013e-02, -1.4603e-02,  4.0307e-02, -3.0364e-02,
        -4.9371e-02, -4.4086e-03, -2.1563e-02, -2.0065e-02,  2.2624e-02,
        -3.2076e-02, -2.4233e-02, -6.7412e-03, -5.4184e-02,  6.6583e-02,
        -1.8048e-02,  1.7587e-04, -2.5464e-02, -6.6955e-02, -3.9878e-02,
         4.9695e-02, -2.2123e-02,  4.7724e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5448, 19.5154,  0.2291,  0.5741, -0.2074,  0.0599,  0.2191, -0.1957,
        -0.1764,  0.4180, -0.1716, -0.2332, -0.1816, -0.6047,  0.1397,  1.3179,
        -0.1021,  0.5020, -0.5535,  0.7730,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -4.5214, -41.9310,  -3.5312,   2.4647,  -0.5287,  -1.4786,   0.2561,
         -1.2441,   0.1526,  -0.9034,   3.5913,   2.4193,  -2.1495,  -1.4733,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0415,  1.7822, -0.0472,  0.0633,  0.0163,  0.0405, -0.0369, -0.0158,
        -0.0178, -0.0537,  0.0201, -0.0681,  0.0293,  0.0196, -0.0061, -0.0900,
         0.0255, -0.0067, -0.0193, -0.0184, -0.0104,  0.0748,  0.0210,  0.0032,
         0.0419, -0.0328,  0.0139,  0.0199,  0.0313,  0.0025, -0.0103, -0.0376,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5309e+00,  1.6618e+01,  8.2881e-01,  1.3245e-01, -6.0421e-01,
         2.8353e-01,  5.9615e-01, -5.5919e-01,  2.0261e-01, -3.2218e-01,
         8.0971e-02,  1.1301e-01,  1.1020e-02,  1.4460e-01,  2.1323e-01,
         1.6582e-01, -1.0629e-01, -3.1476e-01, -3.0584e-02, -2.3810e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3672e-01,  4.0844e+00,  2.4375e-01, -2.9563e-01,  1.3408e-01,
        -9.3282e-02, -5.6622e-02, -6.4542e-02, -7.7738e-02, -1.2021e-01,
         3.1859e-03, -7.2834e-02, -1.7444e-02, -3.4280e-03,  4.9968e-03,
        -7.9540e-02, -5.9081e-02,  9.0728e-03,  2.7414e-02,  6.7177e-03,
        -7.3521e-02, -1.4594e-02,  5.2277e-02, -7.7299e-02, -1.1485e-01,
        -9.0902e-02, -1.1332e-01, -1.3015e-01, -1.2714e-01, -6.3520e-02,
         5.4775e-03, -4.8975e-03,  7.6647e-02,  2.0726e-02,  1.6887e-02,
        -4.0305e-04, -9.0441e-03, -2.9632e-02, -4.4187e-02, -7.1010e-02,
         6.6465e-02,  2.1859e-02, -5.6641e-02, -6.3087e-02, -1.3350e-01,
         5.5134e-02,  1.0490e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3514,  9.1114,  0.1476, -0.1933,  0.3373,  0.1171, -0.1657,  0.0717,
        -0.5755, -0.1519, -0.1270,  0.0640, -0.0996,  0.1090, -0.3542,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0307e+00,  2.7486e+01, -5.9862e-01, -2.6207e+00,  1.4429e-01,
         5.9826e-01,  9.2016e-01,  1.1714e+00, -1.2759e+00,  5.1472e-01,
        -1.6367e-01,  5.6657e-01, -4.2963e-01,  1.6293e+00,  1.1279e+00,
         3.4594e-02, -8.8456e-01, -9.7186e-01, -3.0611e-01,  1.1371e-01,
        -8.3476e-02,  3.6896e-02, -2.2726e-02,  1.8838e-01,  8.7008e-01,
        -2.2364e+00,  5.5392e-01,  1.0917e+00,  3.2824e-01,  6.1957e-01,
         6.9136e-01,  2.9852e-01,  4.5891e-01,  1.0496e-01, -2.4387e-01,
         1.2200e-01,  4.6956e-01,  1.1762e+00,  3.1333e-01,  3.1897e+00,
         1.8506e+00,  1.6183e+00, -2.2712e-01,  2.7018e+00, -3.0001e-01,
        -5.5872e-01, -7.1564e+00,  7.1816e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 3.7834,  6.4240,  0.5413,  0.1461, -0.2798, -0.0252, -0.1214, -0.1759,
        -0.3343, -0.0438,  0.1899, -0.1923,  0.8063,  0.0731, -0.1107,  0.4278,
        -0.3831,  0.4395,  0.2536, -0.0937, -0.0978, -0.2846, -0.2343,  0.3470,
         1.3127, -3.5391,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6229e-01,  2.0216e+01,  1.8539e+00, -7.2700e-01, -9.8355e-02,
         5.3038e-02,  5.2333e-01,  1.8012e-01, -5.9067e-01, -2.3354e-01,
        -1.3191e-01,  5.6862e-02, -2.6281e-02, -2.0141e-01, -4.5962e-01,
        -2.8155e-02,  6.6531e-02,  1.6713e-02,  1.4026e+00,  1.9166e-03,
         1.1543e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-16.3218,  17.0255,  -1.5555,   1.4541,  -0.6124,  -4.9869,   1.9623,
          1.3643,   0.5967,  -0.9003,   1.0572,   0.1327,  -2.5563,   0.5700,
         -1.8755,  -0.8727,   2.7890,  -3.8940,  -0.1888,  -3.2534,  -0.3169,
         -0.7465,   3.6907,  -1.9837,   0.4492,   0.8003,   0.3235,   0.1199,
         -0.2526,   0.2210,  -0.4859,   0.1509,   2.5899,  -0.8321,  -2.9795,
         -6.7529,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2356, 11.4363,  0.3471, -0.3634, -0.0611,  0.8396, -1.9360, -0.1853,
        -0.1534, -0.0169,  0.0122, -0.1753, -0.4122, -0.6935, -0.7147,  0.0822,
        -0.2172, -0.3833, -0.5649,  0.0642,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6336e-02,  2.0017e+01,  1.3558e+00,  5.8253e-01,  3.8882e-01,
        -2.6549e-01,  1.4522e-02, -7.7373e-01, -1.2563e-01, -8.5503e-02,
         8.9104e-01, -7.3211e-02, -7.9798e-01, -1.4866e-01, -9.4698e-02,
        -1.4718e-01, -3.1647e-01, -2.3450e-01,  3.1328e-02,  1.0521e-01,
        -2.4475e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2218e-01,  1.2210e+01,  8.3196e-02, -1.4182e-01,  3.1658e-01,
         5.5670e-03, -2.3974e-01, -2.3014e-01,  5.4523e-01, -7.6030e-02,
         1.4181e-01,  3.0029e-01, -6.9928e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2221e+00,  2.9136e+01,  3.7315e+00,  1.2638e-01, -1.0804e+00,
        -3.4109e-01, -7.4178e-01, -2.6903e-02, -4.6024e-02,  2.5174e-01,
         2.1525e-01,  1.1836e-01, -1.9827e-01, -3.0161e-01,  6.2546e-01,
         5.3331e-01, -3.5025e-02, -5.7752e-02,  1.9664e-01,  1.0378e-01,
         1.8052e-01,  7.9905e-01,  9.1489e-02,  3.6423e-01,  1.0261e-01,
        -3.5154e-02, -9.6909e-01, -6.5313e-01,  4.7187e-02,  2.4478e-01,
         2.5706e-01, -1.0065e-01, -2.0756e-01, -1.0644e-02, -1.4719e-01,
        -2.8475e-01,  3.2397e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4328,  7.7071,  0.8862, -0.2454, -0.4342, -0.2035, -0.2137, -0.1819,
         0.0300, -0.1188, -0.2621, -0.5118, -0.0424, -0.0698, -0.1688,  0.1076,
         0.2482,  0.0383, -0.0932,  0.1470, -0.1258, -0.1217, -0.4636,  0.3135,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9876,  6.7430,  0.1277, -0.4431, -0.1643, -0.0976, -0.3152,  0.1244,
        -0.2966, -0.1778, -0.1452,  0.5960,  0.0803, -0.2679, -0.0662, -0.2199,
        -0.6268, -0.6197, -0.2799, -0.1790,  0.0432, -0.3971, -0.1186, -0.1929,
        -0.2424, -0.1139,  0.1093, -0.0633, -0.0642,  0.0455, -0.6372, -0.0741,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0393e-01,  1.0667e+01,  1.0520e+00, -2.3965e-01,  2.7633e-04,
         1.7286e-01, -2.5622e-01, -3.4632e-01,  1.0279e-01, -3.4842e-02,
        -1.2246e-02, -1.0864e-01, -1.4766e-01, -5.3473e-02, -1.7519e-01,
         6.7305e-02, -7.3889e-02, -1.6324e-01, -6.8538e-02, -2.6245e-02,
        -3.4604e-01, -1.6976e-01, -3.7407e-02, -3.5052e-02,  5.6993e-02,
        -2.2234e-01, -2.4828e-01, -1.0001e-01,  1.8144e-02, -5.4510e-01,
         1.7625e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1107,  5.6333, -0.5883,  0.1708,  0.1788,  0.1975,  0.1004, -0.2960,
         0.0928,  0.0658,  0.0617,  0.0677, -0.0982, -0.0578, -0.0193,  0.1539,
        -0.0149,  0.0889,  0.0233, -0.0622, -0.3403,  0.4067,  0.3353,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-10.4322,   3.5445,   3.2433,   2.1568,  -4.6697,  -2.0728,   2.2560,
         -1.2285,   2.3185,  -4.7727,  -2.1210,  -0.3512,   0.7265,  -0.6888,
         -4.0420,  -1.5464,  -3.7219,  -3.4379,  -0.7878,   0.2885,  -0.5687,
         -1.2890,  -2.0093,  -1.5749,   5.8693,  -3.8280, -20.3351,   4.5952,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-3.9265e-03,  1.7543e+01,  8.2831e-01,  1.2798e-01, -1.3155e-02,
         8.0203e-02,  2.0037e-01, -1.9239e-01, -3.4588e-02, -6.3755e-02,
         1.6401e-01,  1.0967e-01, -7.8477e-02, -2.0037e-01, -6.9955e-02,
        -3.7270e-01, -7.7653e-02, -6.7234e-02, -1.1145e-01, -3.3472e-01,
         1.9281e-01,  9.8230e-02, -1.5397e-01, -3.2480e-02,  5.2049e-02,
         2.2871e-02,  1.7225e-01, -2.6429e-01,  3.8274e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.3666, -10.8832,   0.8026,   0.0110,   0.4697,   1.2971,   0.1991,
          0.8318,   0.1830,   0.2707,   0.2919,   0.3525,   0.5316,   0.3543,
          0.2994,  -0.0504,  -0.1712,   0.0150,   0.0133,   0.0816,   0.1630,
         -0.6500,   0.7480,  -1.2649,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7675e-01,  7.0234e+00,  7.8804e-02,  1.6728e-01,  4.3493e-01,
         1.6453e-01,  1.2280e-01,  4.3834e-01,  3.0878e-01, -1.8793e-01,
        -5.3182e-02,  3.0899e-01, -1.6123e-01, -1.7291e-01,  5.1995e-02,
        -6.7746e-02, -1.9619e-01,  1.9052e-02,  3.7614e-01, -7.0450e-02,
         1.5542e-02, -2.0130e-01,  4.8021e-03,  2.5511e-01,  9.9813e-02,
         6.4929e-03, -6.1036e-03,  4.3380e-02, -6.5295e-02,  5.4771e-02,
        -1.4808e-01,  3.0393e-01, -3.0243e-01, -2.9460e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2034e+00,  4.3871e+01, -1.9743e+01, -3.5517e+00,  7.4224e-01,
        -6.6306e-01,  7.8895e-01,  4.4874e-01,  5.8520e-01, -2.1306e+00,
        -1.1778e+00,  4.2947e-01, -9.1787e-01, -2.6639e-01, -2.0096e+00,
        -1.9288e-02,  6.9750e-02,  1.5494e+00, -2.3394e-01,  2.4276e+00,
        -2.4508e+00,  1.4475e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1735, -5.6955,  0.5701,  0.0720, -0.1685, -0.0482,  0.0368,  0.0474,
        -0.0604,  0.1465, -0.2203,  0.1777,  0.0523,  0.0337, -0.0154,  0.0828,
         0.9188, -0.4077,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3666e-02,  3.0078e+01,  1.7688e+00, -3.2533e-01, -4.4561e-01,
         1.3960e+00, -5.7534e-02, -3.0021e-01, -2.0609e-01, -7.3543e-02,
         1.0488e-01, -1.1155e-01, -5.4789e-01, -6.3474e-01,  3.2864e-01,
        -1.2411e-01, -1.6349e-01, -1.0836e-01, -4.6412e-01, -4.4614e-02,
         3.2680e-03,  1.3086e+00, -2.3701e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5016e+00,  2.2991e+01,  2.1273e+00,  2.8650e-01,  4.8134e-04,
        -1.5866e-01, -3.6275e-01, -2.1921e-01,  1.8175e-01,  2.4088e-01,
         1.9138e-01,  6.8635e-02,  8.4825e-02,  1.9597e-02,  7.5264e-02,
        -9.7140e-02, -2.9188e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.2314, -11.4912,  -0.0724,  -0.3461,  -0.7943,  -0.8029,  -0.0115,
         -0.4794,   0.2026,  -0.0250,  -0.0970,  -0.1611,  -0.3178,  -0.3237,
         -0.3765,   0.0129,   0.1395,   1.4552,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8386, 13.0594, -0.3001,  0.2300, -0.4717, -0.0460, -0.1773, -0.5984,
        -0.2122, -0.1091, -0.2219,  0.0803, -0.1744,  0.0148, -0.0961, -0.4225,
         0.0325, -0.0647,  0.0651,  0.0575,  0.0163,  0.0636, -0.0539,  0.2930,
        -0.4569, -0.6080,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0334e-02, -1.9598e+01, -1.7786e+00,  4.4564e-01, -7.7171e-01,
         3.9308e-02,  9.4433e-01, -4.2614e-02, -1.3995e-01,  4.1366e-02,
         4.5514e-01, -2.3805e-01,  5.2843e-02,  1.2154e-01,  6.0627e-01,
         3.6656e-01, -8.1848e-02,  4.7659e-01,  4.7652e-01,  1.7659e-01,
         3.8353e-01, -3.0889e-01,  3.9089e-01, -4.5379e-01,  9.8575e-03,
        -1.0419e-01, -1.4910e-01,  1.7059e-02, -3.1221e-01,  7.7745e-01,
        -1.4611e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8465,  8.1255,  0.2604,  0.0907,  0.4637,  0.0105,  0.1749,  0.0138,
         0.2222,  0.1437, -0.3031, -0.1286, -0.0986, -0.0747,  0.2610, -0.3767,
        -0.5229, -0.0619,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.7936, -11.6981,  -0.1918,   1.2245,   1.6806,  -0.7639,  -0.7776,
          0.7612,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 5.1708e-01,  2.6636e+01,  1.0290e+00, -7.5987e-01, -2.3283e-01,
         1.0272e+00,  8.0427e-01,  1.5783e-01,  2.3271e-01,  6.8750e-02,
        -5.4556e-01, -2.1002e-01,  1.3266e-01, -3.4713e-01, -2.9781e-02,
         1.4010e-01, -2.1328e-01, -1.2269e-01,  1.4851e-01,  4.8023e-01,
         4.7591e-01,  5.1634e-01,  1.7995e-01,  7.8653e-02, -1.7233e-02,
        -4.5927e-02, -1.9275e-01,  4.3652e-02,  9.2118e-02, -2.8147e-01,
        -3.4102e-02,  5.3490e-02, -3.2230e-01, -3.2627e-01, -2.6933e-01,
         1.1209e-01,  1.7504e-01,  5.3422e-01, -1.2837e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2765e-01,  2.6539e+01, -2.9035e-01,  1.2575e+00,  5.1761e-01,
         7.0210e-01, -1.0363e-02,  3.2324e-01,  5.3765e-01, -3.9708e-01,
         1.5687e-01,  8.9914e-01,  8.8431e-01,  3.4163e-01,  4.5258e-01,
         5.0272e-01, -1.5409e-01, -5.8590e-02, -4.4224e-01, -1.3517e+00,
        -1.6984e-01,  4.8714e-01, -2.1996e-01,  6.0556e-02,  3.6482e-01,
        -7.0420e-01,  2.4237e-01, -1.6707e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.6520, -10.3710,  -0.7996,  -0.4353,  -0.3108,  -0.1813,   0.1987,
         -0.1554,  -0.2187,  -0.1202,   0.0391,  -0.2654,   0.3161,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -2.8473, -13.0037,   0.8718,   0.3910,   1.0222,   1.0129,   0.0787,
          0.9620,  -1.5188,   1.1229,   0.0391,   0.6191,   0.3809,   1.6991,
         -0.6379,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4824e-01,  1.2003e+01,  3.0117e-02, -1.1692e+00,  1.2982e-01,
         1.0950e+00,  1.5250e-01,  5.3388e-01,  3.2437e-01, -4.7637e-01,
        -3.9507e-01, -3.5492e-01, -1.3033e-01,  5.7872e-01,  2.8108e-01,
         1.5034e-01,  1.0263e-01, -3.2799e-02,  2.4354e-02, -2.3865e-01,
         7.6417e-02, -1.3973e-01,  6.7938e-02,  2.6069e-01,  3.9798e-01,
         6.5378e-02, -2.2469e-01,  2.4929e-02, -1.0289e-01, -1.2810e-01,
        -4.4837e-02, -7.1069e-02, -1.0724e-01, -2.5273e-02,  4.5659e-01,
        -1.7718e-01,  9.1833e-02, -4.8060e-02,  4.9301e-02, -8.8635e-03,
         8.9336e-02, -8.1380e-02, -1.2865e-01, -5.6487e-02, -2.8826e-02,
        -1.6557e-01, -4.2658e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.8399e-01,  2.3757e+01, -6.3623e-01, -7.1963e-02, -4.6385e-01,
         2.3545e+00, -5.7336e-01,  2.4765e-01,  6.2994e-01,  2.5829e-01,
        -3.2891e-01, -2.7161e-01, -2.6926e-01, -1.4140e+00,  1.5801e-01,
        -1.1983e-01, -1.6150e-01, -1.7425e-01, -9.7378e-01,  9.8922e-02,
        -1.3349e-01,  2.3577e-01,  3.4255e-02, -8.7582e-02,  4.4026e-01,
        -5.7894e-01, -1.5435e-01,  4.6909e-02, -3.6669e-01,  1.0988e-01,
         6.6331e-02, -1.5037e-01, -2.9584e-01, -9.0634e-01,  7.4796e-02,
         1.0434e+00, -2.7643e-03, -1.1864e-01, -3.3582e-01,  9.3016e-01,
         1.6179e+00,  2.9047e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2721e+00,  8.8154e+00,  1.1113e+00,  2.7078e-01,  6.3125e-02,
         1.2233e-01, -5.0367e-02,  1.8481e-01, -7.2088e-02,  1.1778e-01,
         4.1388e-01, -5.7458e-01,  3.8458e-02,  2.4339e-02,  4.6093e-03,
         7.8892e-02, -1.1719e-01,  5.3214e-02,  1.5248e-01, -2.4004e-01,
        -1.7850e-01, -2.0666e-01, -9.4470e-02,  1.9159e-01, -1.3371e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5726, 10.2469,  0.2930,  0.2819, -0.2968, -0.0311, -0.1168, -0.1108,
         0.1194, -0.2018, -0.2591, -0.2111, -0.1907, -0.6691, -0.7385, -0.2768,
        -0.1170, -0.0136, -0.2746, -0.3276, -0.3325, -0.2716,  0.0731,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2387,  5.6944,  0.4733, -0.3388,  0.1502,  0.2422, -0.0850, -0.2800,
        -0.1942,  0.1403,  0.2615, -0.1448, -0.0361, -0.1476, -0.0496, -0.0817,
        -0.0589, -0.0646,  0.0373, -0.0615,  0.0316, -0.3999, -0.3421,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  4.3778, -17.8090,   1.7328,   1.6007,   0.6742,   0.3497,  -1.1079,
         -0.0651,  -0.5695,   0.3961,  -0.2596,   0.9787,   1.0661,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.9150e+00, -4.0734e+01, -2.0443e+00,  3.0839e+00,  3.5627e+00,
        -1.8996e-01, -2.2964e+00,  1.5866e-02, -3.1777e-01,  2.8222e-01,
         9.0821e-01, -6.0276e-01,  2.6707e+00,  2.3640e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.3295, -18.4186,  -2.5296,  -0.5951,  -0.7024,  -0.3158,  -0.3809,
          0.0570,   0.0923,  -0.0394,  -0.1961,   0.2215,   0.0215,  -0.2143,
         -0.1945,  -0.0215,   1.0296,  -0.3515,   0.2019,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 2.0292, 14.3578, -0.6089,  0.0927,  0.3316,  0.4338, -0.0982, -0.3827,
         0.5247, -0.0885,  0.1926, -0.2261, -0.1168, -0.0886,  0.1087, -0.8608,
        -0.1719, -0.1435, -0.1005,  0.4522,  0.7385, -0.1057, -0.0799, -0.1492,
         1.1340, -1.0861,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8291, 13.6662,  0.6769,  0.1364,  0.2712, -0.2493, -0.3514, -0.3682,
        -0.1235, -0.4868, -0.0536, -0.2316, -0.3195,  0.3331, -0.3133, -0.5448,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3614,  8.9762, -0.3836,  0.9253, -0.0703,  0.4273,  0.4620, -0.0611,
         0.1568, -0.1938, -0.3742, -0.8937,  0.1018, -0.4895,  0.0352, -0.2291,
         0.5114,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3068, 48.0432, -1.9836, -3.0100, -0.3172, -1.8311, -0.0721, -3.6698,
         3.6252,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3074e+00, -2.0512e+01,  8.9200e-01,  9.5982e-01, -1.8964e+00,
        -2.9504e-01, -1.2666e-01, -9.0241e-01,  8.3516e-01, -2.7035e-01,
         1.5102e+00, -9.3162e-02,  7.6876e-01,  4.9060e-01,  1.6561e+00,
         4.9958e-01,  5.8970e-01,  8.5193e-01,  2.8461e-01, -1.9445e-02,
        -1.7438e-01,  9.0958e-01,  2.9143e-01, -7.0901e-03,  1.8256e-01,
         3.5647e-01, -8.4789e-01,  1.0133e+00, -3.3207e-01,  7.8691e-01,
         5.8719e-01,  2.0943e-03,  9.9250e-02,  6.2287e-01, -1.9351e-01,
         1.1352e+00,  5.3846e-01, -1.0030e-01,  4.0512e-01,  5.0976e-01,
         4.1427e-01, -4.3361e-01,  7.0514e-01,  1.0238e+00,  4.4065e-01,
        -3.8043e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9462e-01,  2.4711e+01, -1.5659e+00,  6.1276e-01,  4.6949e-01,
         7.8702e-01,  8.5529e-01, -1.0857e+00, -7.3942e-01,  4.0583e-01,
        -5.7173e-01, -5.1045e-01, -2.7073e-01, -5.7580e-01,  9.8231e-04,
         1.0649e+00, -6.1131e-02, -7.0568e-01, -3.5311e-01,  4.8967e-01,
        -1.3926e-01, -4.6408e-01, -4.8985e-01, -2.5729e-01, -5.1788e-03,
        -9.4723e-02, -6.0219e-02,  6.8323e-02,  3.2140e-01,  1.8389e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2752e+00, -2.4977e+01,  4.4208e-03,  5.0662e-01,  6.6774e-01,
        -5.6921e-01, -2.6472e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2185, 23.1401, -0.5621, -0.8357,  0.3470, -0.9844, -3.1876,  0.5794,
         0.7171, -0.0579,  0.2068,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3000, 43.6563,  5.0731,  5.0131,  1.4872, -1.1543, -0.6196,  0.3769,
         0.4015, -1.0356, -0.0823,  0.3702, -7.3226,  0.5154,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9942, 37.5574,  1.4919,  1.1676,  1.5063,  0.3939, -0.1087,  0.2719,
        -0.0737, -0.8063, -0.0684, -0.7088, -1.1301, -1.7231, -0.5154,  0.1074,
        -0.8838, -0.3513, -0.3789,  0.4860, -1.4771, -0.2971, -0.4218,  0.4806,
        -0.7228, -0.2567, -0.5465, -0.2932, -0.7613, -0.2283, -0.0461, -0.9927,
        -2.8665,  2.8217,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4315e+00,  1.9917e+01, -9.7381e-01,  1.6843e+00,  1.0486e+00,
         2.9219e-01,  6.5072e-01,  1.3457e-01,  2.5243e-01,  5.9004e-01,
         9.2575e-02, -7.1961e-02, -2.0373e-01,  2.0290e-02, -9.0044e-01,
        -1.0852e-01, -5.9991e-01, -1.4406e+00, -5.5047e-01,  1.4104e-02,
         4.7575e-01,  7.7606e-03,  2.0519e-01, -1.0204e-01,  1.1370e-01,
         9.8863e-02,  1.5197e+00, -5.9782e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1313e+00,  1.3725e+01,  1.7185e+00, -2.1044e-01, -6.0602e-02,
        -6.8636e-01, -3.0820e-01, -2.1885e-01, -1.9649e-01, -4.4790e-01,
        -1.7765e-01, -2.0550e-01,  1.6397e-01,  2.1033e-01,  5.3590e-01,
        -4.4889e-01, -3.0825e-01, -4.1844e-01,  2.3628e-01, -3.1230e-03,
         1.9516e-01, -2.2407e-01, -2.4409e+00,  2.0230e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.5942,  8.4075, -0.6226,  0.0614, -0.3690, -0.0968, -0.0918, -0.0871,
        -0.0842,  0.1306, -0.1369, -0.2291, -0.0242,  0.1557, -0.0899,  0.1374,
         0.0417, -0.0509, -0.0385,  0.0786,  0.0789,  0.0970, -0.1041,  0.0513,
        -0.0649,  0.0834, -0.1519, -0.0623,  0.0617, -0.4712, -0.4147,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.6260, -14.8684,   0.1038,   1.0442,   0.3575,  -0.5733,  -0.1656,
         -0.0387,  -0.1099,   0.3794,  -0.4812,   0.6183,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -4.2699, -33.0772,   0.3144,   0.5500,  -1.2616,  -0.8691,  -0.3602,
          0.0554,  -0.2817,  -0.2621,   0.5186,   1.2098,  -1.0028,   3.0189,
          1.8771,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1583e+00,  3.4311e+01, -1.9703e+00, -3.9684e-01, -4.1574e-03,
        -2.2372e-01,  4.9023e-01,  1.0010e+00,  1.5040e-01, -2.9814e-01,
        -5.0851e-01,  4.6876e-01, -1.0488e-01, -4.6157e-01, -1.6572e-01,
         4.9704e-01,  2.0591e-01,  5.6578e-02, -4.1273e-01,  2.0586e-01,
         2.1579e-02, -7.5992e-02, -1.0358e-01, -8.4110e-02,  1.9640e-01,
        -1.1913e-01, -3.0103e-01,  4.7539e-02,  2.7666e-01,  8.3549e-02,
         7.4304e-01,  6.3681e-01,  1.2075e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.9330, -41.0822,   0.6137,   1.0891,   0.5484,   0.6874,  -0.8552,
         -0.0857,  -0.4599,   0.4182,   1.0111,   0.1424,   0.9847,   2.7984,
         -0.8570,   0.5548,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8488, 20.1327, -3.1011,  0.6541,  1.1521,  1.5647,  0.9567,  1.0795,
        -1.6301,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9478, 16.4018,  4.9985,  2.0307,  1.9252,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3144e+00, -5.9472e+01, -7.5442e+00,  1.8628e-01,  3.7803e-01,
        -1.7119e+00,  1.0759e+00, -1.8404e-01,  4.5125e-01,  1.1425e+00,
         1.2190e+00, -6.7326e-01,  1.1623e+00,  1.1981e+00,  1.5069e-01,
        -2.1987e-01,  3.6452e-01, -2.8044e-01, -1.2723e-01,  7.2377e-02,
         3.0332e-02,  7.9625e-02, -3.2652e-01, -2.8619e-01, -8.8184e-01,
        -4.3395e-01, -1.7099e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9932, 20.0449,  0.2769,  0.3614, -2.5384,  0.6261, -1.6860,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.4199e-01, -5.1626e+01,  2.4976e-01,  4.7398e-01, -3.5174e-01,
        -3.3054e-01, -1.7843e+00,  3.5527e-01,  9.2872e-01,  2.6313e+00,
        -6.9566e-02,  1.0769e+00,  4.0135e-03,  4.0938e-01,  1.0956e+00,
         9.8062e-01,  2.0652e+00, -3.1370e-01,  6.8707e-01,  3.1685e+00,
         3.2695e+00, -1.5498e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1290, 13.1929,  0.6442, -0.0996, -0.4880, -0.3096, -0.0936, -0.1154,
        -0.2155, -0.4570, -0.0509, -0.3680, -0.5084, -0.3624, -0.1204, -0.0810,
        -0.0314, -0.0645, -0.0961,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8479e-01,  6.9862e+00,  3.5915e-01, -4.6573e-01,  9.7139e-02,
        -3.8347e-02, -5.1749e-01,  1.4999e-01, -8.4701e-02, -1.0051e-01,
        -9.9943e-02, -5.6917e-02, -7.2254e-02,  6.8408e-02, -5.7832e-03,
        -1.4680e-01, -5.4009e-02,  2.6932e-02, -2.9268e-02,  1.0256e-01,
         1.6945e-02,  1.1689e-01,  1.4725e-03,  3.2408e-02, -5.5850e-02,
        -2.6532e-02,  1.8007e-02, -8.5730e-02,  3.5772e-02, -2.8784e-01,
        -1.1085e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-3.2876e+00, -6.4969e+01, -8.3005e+00,  4.0325e+00,  3.7931e-01,
         9.3941e-01, -2.8024e-01,  3.5059e-01, -3.0917e-02, -3.5018e-01,
        -3.4337e-01, -5.9427e-01,  1.1120e+00,  8.8724e-01, -6.0120e-01,
        -4.4919e-02,  2.3246e+00, -3.5459e-01,  1.3896e+00, -4.5705e-01,
        -4.5113e-01,  1.5555e-01,  1.1035e-01,  1.3371e+00, -5.7135e-01,
         5.5213e-01,  2.6760e-01,  1.4404e+00,  1.1632e+00,  2.1825e+00,
        -1.4050e+00,  1.8565e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.7801, -23.9273,   1.4293,  -0.2487,   0.4287,   0.5749,   1.3358,
         -0.5697,  -0.5844,   0.1109,   0.0832,   0.3035,   0.1247,   0.1676,
          0.4830,   0.6924,   0.0300,  -0.0703,   0.0790,  -0.6323,   0.2427,
         -1.2354,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6321e-01,  2.2989e+01,  1.6554e+00, -7.3937e-01, -3.1300e-01,
         1.5861e-01, -1.5371e-01,  9.2202e-01, -4.1625e-02, -4.0508e-01,
        -8.4575e-02, -9.0557e-02, -1.2995e+00, -8.5017e-01, -2.1239e-01,
        -1.4533e-01,  7.4357e-02,  1.2853e-01, -2.8644e-01, -2.4678e-01,
        -4.6934e-01, -3.0315e-01,  1.6939e-02,  1.5440e-02, -2.2403e-01,
         5.8203e-01, -6.5627e-02, -1.1887e+00,  2.8559e-01, -2.8787e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8756e-01,  6.2111e+00, -1.0348e+00, -1.2870e-01, -4.8487e-02,
        -1.6445e-01,  2.8121e-01, -9.4084e-02,  8.2778e-02, -7.4015e-03,
        -2.4989e-01,  4.0807e-02,  6.3871e-03,  9.4539e-02,  1.6054e-01,
        -3.9784e-01, -1.4081e-01,  5.9967e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9021e-02,  2.6230e+01,  2.6442e+00,  4.4811e-01,  5.3762e-02,
        -2.9641e-01,  9.1506e-02, -3.8693e-01, -7.6579e-01,  3.0893e-02,
        -3.3710e-01, -2.7287e-01, -3.9021e-01, -5.7762e-02,  1.2193e-01,
         7.6682e-01,  2.7357e-01, -6.9752e-01, -3.8571e-01, -1.2498e-01,
        -2.8640e-01, -7.8393e-01, -3.1403e-01, -2.6408e-01, -3.0614e-01,
         1.2545e-02, -4.3605e-02, -7.7306e-02,  2.9162e-02,  1.7427e-01,
        -1.7214e-01, -2.6270e-02, -1.1542e-01, -4.0578e-01,  6.6176e-02,
         3.5806e-01,  2.7234e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1177, 47.2084, -8.0937, -4.3056, -1.5692,  0.1037, -0.5848, -0.3295,
        -0.6763,  0.2430, -0.5143, -0.3126, -0.1793,  1.8901, -0.3696,  0.0562,
        -0.7333,  1.9995, -0.2202,  0.5627, -0.0909,  0.6012, -3.6244,  0.1106,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0592e+00, -2.9413e+01, -1.1735e-01, -1.5127e+00, -7.9136e-01,
         2.4398e-01,  3.1992e-01, -2.8621e-01,  6.4542e-01,  7.0193e-01,
        -1.7681e-01,  9.5505e-01,  1.9007e+00, -3.2685e-01, -1.2984e-01,
        -1.5000e-01,  7.4414e-02,  1.7004e-01,  5.3946e-02,  3.6598e-01,
         1.1254e-01, -6.4444e-02,  1.4012e-01,  2.5484e-01, -4.7019e-01,
        -4.4395e-01, -4.6368e-02, -2.2593e-01, -2.4643e-01,  2.9297e-02,
         9.8626e-01,  1.7787e-01,  1.7199e-02, -2.0786e-01, -2.1240e-01,
         7.3952e-01,  1.9653e-01,  1.0802e-01, -1.9602e-01, -3.4916e-02,
         5.1519e-01,  2.6792e-01,  2.4563e-01, -2.8389e-01, -9.3673e-02,
         4.6982e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6957e-01,  2.2093e+01,  9.5547e-01, -4.7180e-01, -2.1039e+00,
         9.1770e-02, -2.3020e-04, -5.6337e-01,  4.5045e-02, -8.3853e-01,
         4.3921e-01,  5.2002e-01,  1.0100e-01, -3.4555e-01,  1.8140e-02,
        -1.2726e-01,  5.1777e-02,  4.5218e-02, -1.1835e-01, -1.1581e-01,
         7.6932e-01, -8.1339e-01,  5.7028e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.2329, -26.9734,   1.6162,  -3.9514,  -0.8199,  -1.6381,  -1.3283,
          1.3477,   0.4512,  -1.2879,   2.2039,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4406e+00,  2.5184e+01,  1.7013e+00,  4.3727e-01, -4.2604e-01,
        -4.1910e-01, -4.1638e-01, -7.3272e-02,  3.2750e-02,  1.3623e-03,
         5.7757e-01,  1.7120e-01, -7.1118e-01,  1.7600e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6367, 18.4876,  0.7337, -0.0595, -0.3916,  0.3342, -0.0750,  0.1920,
        -0.4949, -0.7539,  1.7991, -1.2092,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8982e-02,  1.9348e+01,  2.6311e+00,  1.2777e+00,  5.6456e-01,
        -4.0810e-01,  3.7975e-01,  3.3073e-01,  1.4629e-01, -1.8382e-01,
        -1.5869e-01, -3.7716e-02,  2.0170e-02, -1.5100e-01,  1.0119e-01,
        -1.8794e-01,  7.9511e-02, -1.3024e-01,  8.4935e-02, -3.4584e-01,
         4.0043e-03,  1.6626e-05,  7.8627e-02, -6.7525e-02, -7.0512e-02,
         5.3743e-01,  4.6755e-01,  4.7451e-03, -2.3910e-01,  1.5719e-03,
         1.4513e-01,  1.7442e-01, -1.3494e-01, -1.0139e-01,  1.2894e-01,
        -1.3704e-03,  2.7564e-02, -1.3351e-01,  8.3558e-02,  9.2220e-02,
         6.0590e-03, -7.2641e-02,  9.3437e-02, -1.3391e-01, -3.7035e-01,
        -1.4123e-01,  7.1071e-02, -1.8565e-01, -7.2045e-02, -1.8559e-01,
         1.6011e-01,  2.0690e-01,  8.8415e-02,  3.0001e-01,  4.3534e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 2.1085e+00,  7.4164e+01,  1.1274e+01,  1.4784e+00, -2.7362e+00,
         1.8474e+00, -1.7384e-01, -2.3265e+00,  8.9643e-01,  1.1424e-02,
        -1.5432e+00,  2.2482e-01, -9.7726e-02,  2.7860e+00, -3.0805e-01,
        -3.6183e-01, -3.0100e+00,  1.1482e+00,  3.0011e-01,  4.9789e-01,
        -1.0700e-01, -4.9056e+00, -5.0725e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5732, 24.0809,  1.2756,  1.5441,  1.5788,  0.0732, -0.1813,  0.2908,
        -0.3037,  1.1521,  0.2383, -0.7687, -1.1040, -0.0588,  0.2344,  0.5893,
        -0.6426,  0.2376,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6864,  6.4943,  1.5286, -0.2415, -0.8784,  0.0597,  1.1693,  0.1573,
        -0.1207,  0.0724, -0.0582,  0.1288, -0.1106,  0.0390, -0.0414, -0.6595,
        -0.3133, -0.7102,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5553,  7.6546,  0.2811,  0.2745, -0.1330, -0.1537, -0.2241, -0.4438,
        -0.7621, -0.0454, -0.2195,  0.2031,  0.1945,  0.2691, -1.4189,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6457e-01,  6.0017e+00,  1.7718e-01,  3.4665e-01,  3.7408e-02,
         2.3163e-01, -7.9519e-02,  5.9881e-02,  3.8860e-02, -1.7674e-02,
        -1.1412e-01, -1.6841e-02,  5.3677e-02, -7.8217e-02,  8.3801e-02,
         3.2923e-02,  2.5373e-02,  1.2062e-01, -3.1077e-02,  7.8664e-02,
         3.5420e-03, -8.6168e-02, -5.5794e-03,  4.9254e-02, -7.1537e-02,
        -1.5810e-02,  6.8927e-02,  8.3481e-03, -1.1803e-01,  2.0914e-01,
        -8.0001e-02, -4.9045e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7787e+00, -2.2244e+01, -1.8223e+00, -4.3453e-01, -1.7072e-01,
         3.9748e-02,  6.5960e-03, -1.5615e-01,  2.1702e-01, -1.2734e-01,
         3.8363e-01,  1.0035e-01, -1.1475e-01,  1.0940e-01,  2.2988e-01,
         1.3955e-01,  2.0873e-01,  8.0026e-01,  7.5395e-02,  1.7246e-01,
         1.0051e-01,  2.9369e-01, -5.3871e-04,  3.4762e-01, -6.8682e-02,
         3.3068e-01,  2.6913e-01, -7.3714e-01, -2.4186e-01,  5.5068e-01,
         1.4601e-01,  1.9321e-01, -1.1825e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.7708e-01,  9.2027e+00,  6.4456e-01,  8.0919e-01,  5.9582e-01,
         1.8748e-01, -4.5664e-02, -9.4643e-02, -1.9105e-01,  6.8082e-02,
        -7.3986e-02, -2.7484e-01, -3.1257e-02, -5.6598e-02,  1.0099e-01,
        -7.1790e-02,  9.5529e-02,  7.8072e-02,  4.3950e-02, -1.6690e-01,
        -6.5676e-02, -1.0184e-01, -2.5887e-02, -1.6044e-01,  3.9741e-02,
        -4.6914e-02,  3.9477e-02,  7.1579e-03, -4.5629e-02, -1.4090e-01,
         9.0734e-03, -1.3826e-01, -4.7445e-02,  2.6282e-02,  4.3643e-02,
        -7.4457e-02, -2.7222e-02, -9.4869e-02,  6.6370e-03, -3.4561e-02,
        -8.5576e-03, -2.6085e-02, -7.3987e-02, -7.5589e-02,  4.1381e-02,
         9.3179e-03, -6.3612e-02,  3.7775e-02, -7.6286e-03,  1.1776e-01,
        -5.0175e-02, -8.0120e-03,  7.6639e-02,  1.6405e-02,  5.0645e-03,
         8.4773e-02,  1.1645e-01,  2.2352e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9435e+00,  2.1533e+01, -1.7761e-01,  1.7570e+00,  2.4130e-01,
         1.7316e-01,  1.4618e-01, -3.7498e-01, -1.5415e-02, -1.3235e-01,
        -3.6261e-01, -2.8267e-01,  9.4779e-01, -4.0276e-01, -3.0642e-01,
        -1.2234e-01,  1.1936e-01, -2.4922e-01, -2.7819e-01, -2.6212e-01,
        -6.9428e-01, -2.3168e-01, -7.6639e-01,  9.5422e-02, -1.6330e+00,
         1.9563e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4926,  8.5402, -0.7255, -0.0985, -0.7486,  0.5252, -0.8760,  0.1017,
        -1.6209, -0.0288, -0.0105, -0.0263,  0.0850, -0.0539, -0.6292, -0.1512,
        -0.0208,  0.0961, -0.3889,  0.0708, -0.2244, -0.0465,  0.0929,  3.0008,
         0.0253,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7013e-01,  1.2748e+01,  1.8347e+00,  1.1068e+00,  7.5174e-01,
         2.6391e-01,  3.4416e-02, -1.2970e-01,  1.2950e-01, -5.8568e-02,
        -4.2725e-01,  1.2808e-01,  9.3965e-02, -2.4523e-01,  2.1499e-01,
         1.3197e-01, -5.5251e-02, -1.9188e-01, -7.8683e-02, -2.2570e-01,
         2.9065e-03,  1.7289e-02, -1.6646e-01,  4.2680e-03, -7.3134e-02,
        -2.8031e-01,  9.0711e-02, -8.9934e-03, -3.1782e-02,  1.4517e-01,
         2.9301e-02, -1.3692e-01, -1.0519e-02,  1.4400e-02,  1.2159e-01,
        -1.2133e-01,  6.2214e-02,  1.6247e-02,  2.5761e-02,  6.9639e-02,
        -1.6127e-01, -1.1199e-01, -1.2437e-02,  2.2003e-01, -3.9568e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2188, 31.0655,  0.7130,  0.7372, -1.1984, -0.0412,  0.5582,  0.6384,
        -0.6972, -0.7509, -0.3143,  0.3089, -0.1420, -0.1806, -0.2131,  0.8722,
        -0.7932, -0.6982, -0.0803, -0.1087, -0.5693,  0.6395, -0.2001,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8782,  4.8189,  0.2229, -1.2813, -0.5205,  0.3262,  0.2939,  0.0695,
        -0.0584, -0.1623, -0.8017,  0.5434,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 1.3274e+00,  6.3867e+00,  3.7735e-01, -8.7036e-02, -7.6292e-01,
         1.0121e-01, -3.4608e-01,  2.8996e-03,  9.3721e-02, -2.0559e-01,
         2.5032e-01,  1.1382e-01,  1.8780e-02,  1.5824e-01,  1.1507e-01,
        -9.7719e-02, -1.5349e-02, -7.4980e-02, -7.1902e-02,  1.6622e-01,
        -2.9690e-01, -2.0443e-01, -6.6792e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3998e-01,  3.2848e+01,  2.5367e-01,  8.7545e-01,  6.3995e-01,
        -4.5061e-02, -1.5866e-01,  3.4735e-01,  4.2887e-01, -5.2385e-01,
        -8.3994e-02,  2.6688e-01, -2.8173e-01, -2.2151e-01, -6.2160e-01,
        -1.3242e-01, -2.9949e-02, -7.7756e-01,  1.3854e-01, -4.9111e-03,
         2.2886e-01,  1.9472e-01, -1.1800e+00, -1.6905e-01,  1.3444e-01,
         1.4756e-01,  1.0633e-01, -5.3888e-01,  2.3769e-01, -1.4853e-02,
         7.5380e-02, -5.4908e-01,  1.7801e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2618, 40.1173,  3.7189,  0.1130,  0.4098,  0.3883, -0.2319,  0.2684,
        -0.8897, -0.4104, -0.4584, -0.0587, -0.4387, -0.6250,  0.1093, -0.7251,
        -0.2023, -0.0981,  1.4631,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6441, -9.8473,  0.1322, -0.8011,  0.5116, -0.1695, -0.0377,  0.2737,
         0.1055, -0.6281, -0.0393,  0.0706,  0.2173,  0.0518,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7277e+00, -3.6895e+01, -9.4527e-01,  7.0991e-02, -2.4992e+00,
        -1.8365e-01,  6.3761e-01,  6.6920e-01, -1.2813e+00, -8.4398e-02,
         1.1134e-01,  3.1773e-02, -8.6141e-02,  1.5933e+00, -2.6093e-01,
        -4.4402e-02,  6.2826e-01,  5.5244e-02, -1.2811e-01,  5.9882e-01,
        -1.2290e+00,  2.2164e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2283e-01,  1.2278e+01,  1.0666e+00,  9.9782e-01, -1.2469e-01,
         5.8889e-01,  7.2081e-02,  4.3569e-01, -1.1365e-01, -3.2399e-01,
        -1.6795e-01, -2.9897e-01, -2.0698e-02, -9.6255e-02, -1.1692e-01,
         2.3348e-03,  1.1493e-01, -2.8196e-01, -4.7908e-02,  1.0393e-01,
        -1.0417e-01, -2.2129e-01, -3.4106e-02, -3.1544e-01, -1.9520e-01,
         2.1700e-02,  5.6623e-02,  1.2919e-01, -1.4589e-01, -4.5320e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2027e-01,  9.0032e+00, -1.6272e-01, -2.5395e-01, -3.5542e-01,
         3.9813e-01,  4.3295e-03,  9.6287e-02, -2.9307e-01, -3.9060e-01,
        -1.3022e-01,  1.4961e-01, -2.6083e-01, -1.9411e-01, -3.8508e-02,
         6.5039e-02,  8.1900e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.3269, 46.9853,  4.8101,  2.3490, -2.8299,  1.5824, -0.0764,  0.7132,
        -0.4178, -0.7670, -4.3476,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9430e-02,  5.0763e+01,  3.6310e+00, -1.7018e+00, -3.0643e+00,
        -1.3143e+00, -5.1268e-01, -4.6597e-01, -5.2047e-01,  1.8903e-01,
         7.8007e-01,  5.2927e-01, -1.8478e+00,  1.0868e-01,  1.7919e-01,
        -1.5674e+00,  1.7583e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9777e-01,  1.0813e+01,  8.0515e+00, -1.2506e+00, -2.2899e-01,
         6.0677e-02,  3.2797e-01, -3.5410e-01,  2.1130e-01, -1.5120e-01,
        -2.5754e-01,  5.3868e-01, -1.7771e-01,  7.0698e-02, -8.4847e-02,
         7.4599e-02, -1.1292e-01, -2.2632e-01, -4.3688e-01,  8.1231e-03,
         1.4453e-01,  9.5019e-02,  3.5004e-01, -1.0780e-02,  1.8246e-01,
         3.1786e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4759,  4.5537,  0.2964,  0.0133,  0.0796, -0.0151, -0.0527,  0.0174,
        -0.1345, -0.0154, -0.0420, -0.0701, -0.0492,  0.0251,  0.0120, -0.0432,
        -0.0485,  0.0347, -0.0357, -0.0226, -0.0502, -0.0635, -0.0332, -0.0518,
         0.2387, -0.0526, -0.0164,  0.0913, -0.0834, -0.1971,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.6308, -15.2104,   0.1022,   0.3150,  -0.1562,   0.2239,   0.1488,
         -0.2170,   0.1716,  -0.2109,   0.2314,  -0.0503,  -0.2896,  -0.1191,
          0.3705,   0.0759,   0.3738,  -0.0205,   0.8725,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-5.7443e+00,  5.5462e+01,  1.4211e+01,  2.8399e+00,  3.3663e+00,
        -5.8275e-01,  3.5746e+00,  1.8292e-01, -2.5585e-01,  2.9887e-01,
        -4.9885e-02, -7.5559e-01, -2.5734e-02,  1.1440e+00, -8.9459e-01,
        -8.2944e-01,  1.2122e+00,  7.0952e-01, -5.4115e-01,  1.1889e+00,
         2.4715e-01, -4.8712e-01,  3.3586e-01, -4.1150e-01, -9.6724e-01,
        -2.2721e-01,  1.5228e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  4.9798, -28.7224,  -2.1355,   0.8165,   1.6528,   0.7341,  -0.4562,
          0.7147,  -0.2044,  -0.2772,   0.5018,  -0.5369,  -2.7603,   0.3452,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0600e-01,  1.6138e+01,  4.1276e-01,  5.9315e-01,  2.3336e-01,
         4.2202e-01,  1.2507e-01,  3.2410e-01,  3.8424e-01, -8.2466e-02,
         9.9875e-01, -1.8367e-01, -1.5550e-01,  5.5711e-02, -1.5558e-01,
        -1.2586e-01, -8.6999e-03, -7.5118e-02, -4.8580e-01,  1.9392e-01,
        -2.0619e-01, -2.1931e-01, -5.1776e-01, -8.5246e-02,  5.3200e-01,
        -1.0122e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8965, 16.8931,  0.3332, -0.2897,  0.6415, -0.8520,  0.4629, -0.1110,
         0.6814, -0.5664,  1.2122,  0.5268,  0.6815, -1.3042,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3357e+00,  1.6024e+01,  5.5065e-01,  4.5782e-01, -1.2802e-01,
         5.7207e-01,  2.5449e-01,  2.8886e-01,  6.2683e-02, -1.4719e-01,
         9.3288e-02, -1.0300e-01,  1.1301e-01,  7.1925e-01,  6.2281e-02,
         9.3926e-03,  1.4302e-01, -5.0580e-02, -1.5564e-01, -6.2021e-02,
        -4.4741e-02,  4.2538e-02,  1.8677e-01, -6.4175e-03, -3.5463e-01,
        -1.5723e-01, -1.6600e-01,  4.3401e-02,  3.3205e-01,  2.5190e-01,
         1.0372e-01, -9.9039e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0614e-01,  9.0156e+00,  4.6138e-01,  8.3311e-01, -2.2780e-01,
         3.8068e-01, -3.1637e-01, -4.3639e-02, -4.1730e-02, -1.5605e-01,
        -1.0413e-01, -1.2683e-02, -9.7242e-02, -1.1395e-02, -1.9202e-01,
        -4.5390e-03, -1.1489e-01,  1.1631e-01, -5.7447e-02, -1.0426e-01,
        -6.7320e-02, -5.9139e-02,  1.3561e-02, -8.4135e-01, -1.5687e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0274,  5.1649,  0.9034,  0.3352,  0.1803, -0.3375,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1329e-01,  9.7450e+00,  4.5074e-01,  1.3334e-01,  2.8440e-01,
         4.5088e-02,  2.6391e-02,  2.3682e-02,  4.9502e-03,  8.1651e-02,
         5.0557e-02,  3.6152e-01,  1.7801e-01,  9.4270e-02,  6.0354e-02,
        -4.5081e-02, -2.5387e-01, -1.6216e-02, -1.3276e-02, -1.0795e-02,
        -3.8055e-02, -3.2652e-02,  7.0835e-02,  4.7329e-03, -5.1361e-02,
        -1.1939e-01,  5.0363e-02, -2.4209e-02, -5.4363e-02, -6.6881e-02,
         2.2623e-01,  3.3386e-02, -1.7319e-02,  8.1655e-03, -5.6903e-02,
         6.2541e-01, -2.6842e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0063e-01, -1.2208e+01, -6.7582e-01, -4.7457e-02, -5.3848e-01,
        -1.1288e-01, -1.9136e-01,  1.5449e-01, -7.0915e-02,  2.4804e-01,
         8.7407e-02,  9.2380e-03, -2.1413e-01,  5.3912e-01,  3.6067e-01,
         9.5681e-02,  4.5920e-01,  1.4640e-02,  4.4305e-02,  1.3471e-01,
         3.7285e-01, -5.9545e-02,  1.3141e-01,  5.6380e-02,  4.0954e-02,
         1.0607e-01, -1.5231e-01,  1.6388e-01,  3.2818e-01,  2.0150e-01,
        -8.5929e-03,  4.3987e-02, -2.3832e-01,  2.8902e-01, -1.0450e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.7602, 35.8696, -0.2813,  0.5625, -0.6491,  1.0331,  1.1004,  2.4648,
         2.0425,  1.0473, -1.9116, -4.0888, -2.5640,  8.0982, -8.2375,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3238e+00, -2.8917e+01, -8.1094e-01,  1.2720e+00,  5.5891e-01,
        -5.2081e-01,  1.3440e-01,  3.4763e-01, -1.2246e-01,  2.9150e-01,
        -8.3640e-02,  4.2023e-01,  2.6455e-01,  9.3356e-04, -1.5380e-01,
        -2.5755e-01,  9.1565e-02, -2.4817e-01,  3.0376e-01,  6.6003e-01,
         2.0714e-01, -3.0442e-02,  2.7108e-01,  3.9391e-02,  3.4413e-01,
         1.9600e-01,  4.7287e-01,  2.0806e-01,  2.0593e-01, -1.5729e-01,
         1.9308e-01,  1.6664e+00,  1.1989e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8146e+01,  9.0086e+01,  7.2415e+00, -5.8557e-01, -1.6158e+00,
        -3.7022e+00, -4.6400e-01, -6.2698e-02, -1.2900e+00,  3.3130e-01,
        -8.8778e-02,  4.8013e-01, -8.2991e-01, -1.4182e+00, -1.1430e+00,
        -5.0591e-01, -6.8149e-01, -6.0904e-02, -4.2487e+00,  1.0865e+00,
        -1.5244e+00, -1.2170e+00, -1.2822e+00,  4.7876e-01,  1.3929e-01,
        -4.1735e-01, -2.3510e+00, -6.5323e+00, -2.8378e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-6.9632e-03,  8.3801e+00,  3.5886e-01, -5.3382e-01, -3.7173e-01,
        -1.1626e-01,  3.0772e-01,  1.8011e-01, -1.0982e-01, -1.7923e-02,
         1.3320e-01, -1.2993e-02, -1.1661e-02, -2.3467e-01, -8.0855e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0019e-01,  1.5335e+01, -5.8511e-01,  1.6568e-01,  4.2209e-01,
         1.1928e-01, -5.2531e-02,  2.0258e-01, -1.6947e-01, -6.7660e-03,
        -3.0867e-01,  5.5410e-02, -5.6784e-02, -1.4017e-01,  6.9121e-02,
         1.8051e-01,  8.2846e-02,  1.3871e-01,  6.3064e-02,  1.0949e-01,
         2.2311e-01, -7.1750e-04,  5.5467e-02, -2.0703e-02, -5.9264e-02,
        -4.3609e-03, -1.4982e-02,  8.5502e-02,  8.2718e-02,  9.4954e-02,
        -5.8100e-02, -7.5307e-03, -5.7709e-02,  5.3660e-02, -9.9017e-02,
        -5.1389e-02,  8.7139e-02, -1.3371e-02,  3.4627e-02, -4.4255e-02,
        -1.4920e-03,  4.8923e-02, -5.5818e-02, -3.1506e-02,  1.0956e-02,
        -1.3732e-01,  1.3863e-01,  1.8919e-01, -1.1235e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5040, 27.6663, -4.3025, -0.8507,  0.7968, -0.4129, -0.1703,  0.8998,
         1.3692, -1.8829,  0.2774, -0.0439,  0.0369, -0.0535, -0.2549, -0.2745,
        -0.9287,  0.0650, -0.3595, -0.1005,  0.5419, -0.4027, -0.3315,  0.1353,
         0.2092,  0.1827, -0.0650,  0.6909,  0.0398, -0.1061,  0.1047,  0.0794,
         0.0322,  3.4822, -0.2069,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.5226, -45.1106,  -1.7544,   0.1082,  -0.2322,   0.5830,   0.6273,
         -0.0661,   0.1093,   0.3148,   0.7322,   0.6585,   0.3524,   0.2775,
         -0.3009,   0.3924,  -0.4148,   1.1113,   2.6504,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.4289e-02, -2.7932e+01,  1.8862e+00, -3.2445e-01,  8.2317e-02,
        -2.7717e-01, -7.1246e-01,  2.5634e-04, -1.7146e-01,  1.9548e-01,
         3.0496e-01, -3.8998e-01, -1.7820e+00, -9.3640e-02,  1.4720e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4966e-02,  9.3429e+00, -7.8656e-01, -1.6913e-01,  1.1565e-01,
        -2.5362e-01, -3.9076e-01, -1.1711e-01,  5.5032e-01,  1.0106e-02,
        -8.6368e-02,  7.7671e-02, -3.9585e-03,  4.5862e-03, -2.2749e-01,
         1.1137e-01, -3.1098e-02,  7.2327e-02,  1.2166e-01,  2.7544e-01,
        -2.8696e-02, -8.2786e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9386e-01,  4.2501e+00,  2.1949e-01, -1.2158e-01, -1.0366e-02,
        -7.2062e-02, -5.7029e-02,  7.6975e-02, -2.0967e-02,  7.0064e-02,
         3.4514e-02,  2.1749e-01,  1.1072e-02,  1.0835e-01,  3.7387e-03,
         4.5347e-02, -2.4322e-02, -2.2437e-02, -8.7176e-03, -2.2652e-02,
         1.7526e-02,  2.3488e-02, -2.3504e-02, -2.9970e-02,  1.9995e-01,
         7.5381e-03, -6.4758e-02, -4.0400e-02,  6.3608e-02,  2.1064e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0992,  2.3298, -0.2008, -0.1010,  0.3974,  0.1488,  0.0335,  0.1183,
        -0.0159,  0.4550, -0.1111,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.2797e-01,  1.2360e+01, -1.0897e-03, -1.5241e-01,  4.7068e-02,
        -1.3887e-01,  1.7838e-02,  1.0448e-01, -4.6724e-02,  6.5720e-02,
        -5.9874e-01, -6.1736e-02, -9.6806e-02,  1.3982e-01, -2.6013e-02,
         1.2058e-01, -1.4359e-01,  6.7018e-03,  5.3707e-02, -2.0279e-01,
         6.5392e-01, -9.9707e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4807e+00, -4.4333e+01, -2.9389e+00, -1.4758e+00,  1.3494e-01,
         6.7347e-01, -3.1856e-03,  3.7624e-02,  2.0672e-01,  9.9611e-01,
         1.8473e-01, -1.2156e+00,  9.0136e-02,  1.3528e+00,  1.2691e-01,
         1.0342e+00, -1.9468e-01,  3.5302e-01, -5.9800e-01,  3.8552e-01,
        -1.0114e-01,  4.4621e-01,  7.9445e-01,  6.1497e-01,  3.4645e-02,
         7.2164e-02,  3.8339e-01, -1.8079e-01,  4.6078e-01, -2.3942e-01,
         8.1800e-02, -1.3683e-01, -3.9767e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7753e-01,  3.5874e+01,  3.0362e+00,  9.0513e-02,  1.7262e-01,
         1.0936e+00,  1.9352e-01,  3.8566e-01,  3.9191e-01, -4.7577e-01,
        -8.5200e-01,  4.5807e-01, -1.6785e-01,  7.1846e-01,  2.2883e-02,
        -2.8547e-01,  2.0788e-01, -2.4484e-01, -4.0567e-01, -8.7510e-01,
        -8.8916e-01, -5.3495e-01,  3.6183e-01, -1.8550e-01,  3.0564e-01,
         6.4785e-01,  8.3734e-01, -2.4991e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3988e-01, -6.3030e+00, -8.3636e-01, -1.1738e-01,  2.6689e-02,
         3.7340e-02,  2.9173e-02, -7.3394e-02,  1.4332e-02,  9.8285e-03,
         8.7286e-03,  2.5190e-02,  9.0464e-02,  1.2539e-01,  1.1517e-01,
         1.0196e-01,  1.3817e-01,  7.9559e-02,  7.0543e-02,  6.1920e-03,
         6.0915e-02, -1.4205e-02,  6.0740e-02, -9.0472e-02, -4.1102e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-1.0639e-01,  9.6433e+00,  1.1089e+00, -3.6336e-01,  2.4280e-01,
        -2.0495e-01, -3.2690e-01, -3.2146e-01, -1.1878e-01, -7.1128e-02,
         1.2464e-02, -9.2770e-02, -1.0522e-01,  6.7494e-02,  1.1638e-01,
        -2.5219e-01, -1.8801e-01, -1.1739e-01, -5.7546e-02, -1.1847e-01,
         6.1182e-03, -7.3662e-02, -2.7618e-01,  2.2015e-01,  4.9942e-01,
         1.2559e-01,  1.6171e-01, -2.8962e-02,  1.5563e-02, -1.0609e-03,
         5.9502e-02, -1.2952e-01, -2.2761e-01, -8.4371e-02,  7.9203e-02,
        -1.5949e-01, -7.9706e-02, -1.8054e-02, -2.1415e-01,  3.8325e-01,
        -3.1025e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0116e+00,  3.0161e+01, -4.2522e-01, -5.0723e-01,  1.1619e+00,
        -1.3762e+00, -8.1285e-01, -6.9098e-02, -2.4040e-01,  1.4365e-01,
        -5.1867e-02, -9.0656e-01,  6.4656e-01, -6.4311e-02,  7.1507e-01,
        -2.2922e-01, -2.1615e-01, -1.8359e-01, -3.2279e-01, -9.3837e-01,
        -1.5228e-02, -9.6332e-02, -1.8241e-01, -2.1231e+00, -1.2639e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1413e-01,  5.1430e+00,  1.4510e-01, -5.0851e-01, -7.3340e-02,
         1.7208e-01, -1.3137e-01, -3.1821e-02, -3.6194e-02, -1.1529e-01,
         7.0716e-03,  6.7545e-02, -4.5429e-02,  2.0703e-03,  2.2680e-02,
        -1.4955e-02,  2.0415e-02,  6.3094e-02, -9.9602e-02, -4.8081e-03,
        -5.2782e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4665, 23.6159,  1.1364, -0.0840, -0.3585, -0.1356, -0.1332, -1.3591,
        -0.4017,  0.8107,  0.7886, -0.1516, -0.0307, -0.4760, -0.3359,  0.2796,
        -1.8392, -0.7985,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7590e-01,  4.4223e+00,  1.7725e-01,  4.1797e-01,  2.0023e-01,
        -1.0569e-01, -3.6417e-03, -1.0624e-01,  9.3768e-02,  6.3987e-02,
        -1.0753e-01, -5.3732e-03, -8.2141e-02, -1.2645e-01, -7.5791e-02,
         3.6444e-02, -1.5717e-01,  9.5027e-02, -1.3683e-01, -9.1451e-02,
        -8.9520e-02, -7.4999e-02, -3.4303e-02, -8.4709e-02,  1.6272e-02,
         6.3800e-02,  5.5100e-02,  3.3047e-02, -3.9420e-02,  3.2607e-02,
         1.9970e-02,  1.4136e-02, -8.5455e-03, -2.6516e-02, -6.1649e-02,
         1.3141e-02,  1.0760e-01,  1.1314e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9745,  7.3224, -0.5724,  0.2219, -0.1216, -0.0194, -0.0826, -0.0213,
        -0.0971,  0.0144, -0.0411, -0.0667,  0.0232, -0.2322,  0.0091,  0.0627,
         0.0380, -0.4610, -0.5909, -0.9942,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1323e-01,  2.9261e+01,  2.2223e+00, -2.8005e-02,  8.6279e-01,
         2.6695e+00, -4.1800e-01, -8.7809e-03,  7.0147e-01,  1.0077e+00,
         5.9740e-01, -6.6958e-01, -3.9952e-01,  7.5479e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1451,  1.2182, -0.1559, -0.0438,  0.0181,  0.0325,  0.0278,  0.0120,
         0.0223,  0.0274,  0.0094,  0.0184,  0.0234, -0.0459, -0.0072, -0.0228,
         0.0126,  0.0025,  0.0098,  0.0039,  0.0105,  0.0469,  0.0095,  0.0530,
        -0.0026,  0.0361,  0.0100,  0.0117,  0.0105,  0.0434, -0.0186,  0.0422,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.4238, -31.9935,  -1.0392,   0.5748,  -0.0795,   0.9601,  -0.0428,
         -0.1822,   0.2688,   1.4208,   0.5407,   0.6430,   0.2219,  -0.5931,
         -0.3520,   0.0809,   0.3868,   0.6218,   0.1675,   2.0898,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1354,  1.9834,  0.1816, -0.0791, -0.0233,  0.0491, -0.0383, -0.0243,
        -0.0246,  0.0265, -0.0151, -0.0206, -0.0249, -0.0293, -0.0260, -0.0266,
        -0.0186, -0.0152,  0.0076,  0.0060, -0.0197, -0.0391, -0.0378, -0.0150,
        -0.0030, -0.0160, -0.0509, -0.0424, -0.0248, -0.0270,  0.0079, -0.0083,
         0.0055,  0.0095,  0.0078,  0.0102, -0.0117, -0.0430, -0.0214,  0.0167,
         0.0225,  0.0557,  0.0361, -0.0033, -0.0728,  0.0066, -0.0033,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6275e-01,  1.2118e+01, -2.0411e-01,  2.0031e-01,  1.1725e-01,
         2.5920e-01, -6.4192e-03, -2.5272e-01, -3.9746e-01,  2.2920e-01,
        -4.2626e-01,  3.2042e-01, -2.2607e-01, -3.1183e-01,  9.6969e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2776e+00,  1.2073e+01,  1.1334e+00,  1.0455e-01, -2.2251e-01,
        -2.6106e-02,  1.1069e-02,  7.4667e-02,  1.3821e-01,  3.9065e-03,
        -2.2272e-01, -9.3701e-02, -1.2517e-01,  2.6683e-01,  1.7230e-01,
         5.2195e-02,  5.9436e-02,  7.1760e-02,  1.9501e-01, -1.8817e-01,
         2.6209e-03, -1.9618e-01, -7.4200e-02,  2.3754e-01,  5.5015e-02,
        -4.9107e-01,  2.0749e-01,  2.3863e-01, -1.5703e-01,  8.0999e-02,
        -3.9026e-02, -6.4302e-03,  1.4654e-01, -7.4974e-02,  8.1288e-02,
         1.6117e-01, -4.2942e-03, -5.3229e-02, -2.5020e-01, -2.7358e-04,
        -5.0096e-03,  1.9790e-02, -4.2477e-02, -2.6667e-02, -2.1754e-02,
         1.0913e-02,  2.2562e-01,  3.5735e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-1.7254, 20.4409,  0.9838, -0.0504,  0.5106,  0.1840,  0.4534,  0.1860,
        -0.9355,  0.0603,  0.0582,  0.1880, -0.1385, -0.2796,  0.2110, -0.1104,
        -0.5560, -0.2469, -0.3852,  0.1730,  1.1618,  0.1073,  0.2523,  0.6859,
         2.0221, -0.1237,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.1155e-01, -3.3344e+01, -1.2662e+00, -6.0566e-02,  2.9852e-01,
        -5.1669e-01, -1.3735e+00, -1.4737e+00, -3.0337e-02,  1.9603e-01,
        -1.0422e-01, -3.1066e-01,  1.4257e-01,  5.3630e-01,  3.8188e-01,
         3.3561e-01,  5.2403e-01,  6.4687e-01,  1.5152e+00, -4.4127e-01,
        -8.9482e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3025e-01,  2.4013e+01,  3.7405e+00, -4.4946e-02,  5.4386e-01,
        -6.7411e-01, -3.8932e-01, -5.4128e-02, -4.1186e-01, -1.2973e-01,
         1.5455e-01,  1.8258e-01, -7.5132e-01, -4.6972e-01,  5.6374e-02,
        -1.3413e-02,  8.8979e-02, -5.9915e-01,  2.5108e-01, -5.0045e-01,
        -2.2088e-01, -2.4016e-01,  2.1462e-01, -2.2878e-01,  6.8934e-01,
        -1.8141e-01,  1.2545e-01, -9.0015e-03, -1.4280e-01,  6.0919e-01,
        -2.9510e-01, -9.4040e-02,  1.0983e-01,  1.9040e-01, -2.5769e-01,
         3.4150e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0691e-02,  9.8142e+00,  3.5751e-01,  5.8349e-02,  2.3098e-01,
         1.3852e-01, -4.0693e-01, -7.2403e-02, -3.1248e-01,  3.5474e-01,
         1.9809e-01,  6.1968e-02,  3.6566e-03, -4.3491e-02, -8.6366e-02,
         4.2249e-01, -2.3052e-01,  2.7833e-01,  2.2304e-01, -4.8637e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3988e+00,  1.4266e+01,  2.9685e-01,  2.6116e-01,  2.3202e-01,
        -2.3364e-01, -2.4904e-01, -4.5901e-01, -7.7246e-01,  7.8030e-02,
         3.6209e-01,  2.8101e-01, -3.0250e-01, -1.5679e-01, -7.8054e-03,
         1.3272e-01, -2.3142e-01, -1.3980e-01,  1.1180e-01, -1.9267e-01,
        -1.1978e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3571, 14.3134,  1.1931, -0.1099,  0.0563, -0.1368, -0.4807, -1.1669,
         0.1241,  0.6216,  0.5881,  0.5511,  1.3460,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0112e-02,  6.0330e+00, -1.2449e-01,  1.4870e-01,  4.5696e-02,
         1.2849e-01,  1.0694e-03,  5.9500e-02,  4.5687e-02,  3.6473e-02,
         1.1978e-01, -6.2553e-02, -5.8863e-02, -8.2430e-02,  9.3428e-02,
         1.4208e-01,  3.3676e-02, -1.1079e-02,  9.5456e-03,  1.9837e-02,
         9.9856e-02, -5.9285e-02,  3.9739e-02,  1.5377e-01,  3.7675e-02,
         6.7829e-02, -1.4881e-01, -7.7061e-02,  3.7566e-02,  6.5885e-02,
         2.0753e-02,  2.8459e-02,  1.9854e-02, -4.6048e-02,  8.9981e-02,
        -4.9370e-03,  2.2493e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3849, 24.5843,  0.0814,  0.0799, -1.3494, -0.6915, -0.2238,  0.0796,
         0.4230, -1.1201, -0.1543, -0.6499,  0.4669, -0.4473, -0.1793, -0.3246,
         0.4660,  0.0974,  0.2014, -0.1087, -0.0689, -0.2796,  0.1620,  0.5314,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1685, 17.5794,  3.8054, -0.1679,  0.3668,  0.2503, -0.2008, -0.4719,
        -0.3094,  0.0371, -0.1209,  0.8339,  0.3059,  0.0397, -0.6701, -0.2244,
        -0.1354, -0.2023, -0.2190, -0.1892, -0.1781,  0.7084, -0.1429, -0.2258,
         0.0804, -0.0254, -0.0449,  0.0468, -0.0293,  0.3229, -0.1713, -1.7130,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5522e-01,  2.1656e+01, -1.1585e-01,  3.8083e-02,  1.0464e+00,
         3.1504e-01, -3.4832e-01, -4.5540e-01,  4.0542e-01, -8.9664e-01,
         5.2284e-02, -1.2711e-01, -2.7239e-01, -1.8462e-02, -8.3659e-02,
         2.9283e-01, -2.4855e-03, -1.7352e-02,  4.9480e-01,  4.1670e-01,
        -4.9199e-01,  1.8103e-01, -2.4953e-01, -5.7171e-01,  2.9844e-02,
         2.8559e-01, -5.3687e-01, -1.1512e-01, -6.3143e-02,  1.2835e-01,
        -2.5630e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4425e-02,  1.6292e+00, -6.6050e-02, -1.5597e-02,  7.2863e-02,
         4.9869e-02, -1.2234e-02,  5.8806e-03,  3.7453e-02,  5.2549e-03,
        -1.2168e-03,  3.6364e-02, -2.3953e-02, -1.7047e-02,  4.3829e-03,
        -1.6989e-02, -9.5147e-03,  2.4870e-02,  6.2105e-03, -4.8842e-03,
         3.9935e-02, -7.9469e-02,  8.0600e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.1564, -27.1168,  -1.1095,   1.3128,  -0.3801,   0.9686,   0.1980,
          0.7143,   0.5904,   0.8142,   0.4395,   1.5559,   0.4376,   0.6099,
          0.4781,   0.5681,   0.7933,   0.2052,   0.6278,   0.0454,   0.7506,
          0.3239,  -0.3882,  -0.1378,   0.1895,  -0.2395,   2.1718,   0.2365,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-9.3299e-01,  1.8263e+01,  7.0929e-02,  4.0220e-02, -1.8046e-01,
         1.1053e-02,  1.1204e-01,  2.6008e-01,  1.6424e-01, -8.9637e-01,
        -2.9495e-01, -3.6927e-01, -1.6512e-01, -4.9353e-02, -1.6940e-01,
        -1.1261e-01, -1.0359e-01, -5.4891e-02, -1.8874e-01,  2.6403e-02,
         2.6335e-01,  1.7427e-01, -1.8105e-01,  1.1518e-01, -1.4460e-01,
         1.0629e-01,  9.5893e-01, -1.0879e+00, -1.0779e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3709e-01, -4.9376e+01, -9.6653e-01,  1.8972e-01, -8.9751e-01,
        -1.8036e+00, -9.4581e-01, -6.0847e-01,  2.9380e-01,  5.3786e-01,
        -6.9350e-01, -2.3821e-01,  1.0323e+00,  5.2701e-01, -3.5577e-01,
         3.5059e-01, -5.0294e-01, -5.9213e-01,  4.1683e-01,  1.4856e+00,
        -3.6191e-01,  3.3080e-02,  5.2133e-01, -2.2576e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0218,  2.0826,  0.2144,  0.1595,  0.0446,  0.0026,  0.0047, -0.0611,
         0.0734, -0.0279,  0.0310,  0.0796, -0.0595,  0.0342, -0.1057,  0.0214,
         0.0065, -0.0029,  0.0100, -0.0272,  0.0282,  0.0111,  0.0025,  0.0207,
         0.0445,  0.0186, -0.0470,  0.0160, -0.0095,  0.0210, -0.0194, -0.0783,
         0.0311, -0.3393], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3812e-01,  3.5677e+01,  7.6223e-01, -3.8426e-01, -1.2422e-01,
        -5.3792e-02,  1.5385e+00,  5.5946e-01,  5.6379e-01,  1.9678e+00,
        -2.0525e-01, -4.4559e-01, -1.4528e-01, -3.7207e-01,  3.3442e-03,
        -6.4726e-02,  3.4273e-01, -4.0741e-01,  3.0852e-01,  7.8473e-01,
        -1.5199e-01,  4.2986e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.1825, -38.9991,  -1.7227,   1.2710,  -1.6241,   1.5160,  -0.3933,
         -0.2805,   0.1108,   0.0444,  -0.4088,   0.3772,   0.9211,   0.1137,
         -0.3713,   0.3746,   1.0453,   0.8505,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1297e+00,  2.4923e+01,  3.8934e-01, -7.2635e-01,  1.0995e+00,
        -4.5435e-02,  2.8821e-01,  7.6786e-01,  5.5927e-03, -7.3392e-01,
         2.0636e-01, -1.1851e-01, -1.9267e-01,  1.5667e-02, -2.7469e-01,
        -6.0078e-01, -7.0440e-02, -2.4670e-01, -8.4688e-01, -4.7713e-02,
        -5.7146e-01, -1.4298e+00, -5.2891e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7844e-01,  4.4933e+01,  2.4833e+00,  1.7042e+00,  2.8503e-01,
        -1.0904e-01,  9.1983e-02, -4.5262e-01,  1.0176e-01,  9.4744e-02,
         1.0197e-01, -5.1988e-01,  2.6490e-02, -7.4109e-01, -5.0106e-01,
        -4.0770e+00, -1.1409e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5984e+00, -3.6362e+01, -1.0000e+00,  1.0058e-01,  1.8844e-01,
        -3.0326e-01,  1.0030e+00,  1.0616e+00,  5.3180e-01,  9.4264e-01,
         7.5684e-01,  4.8651e-01,  3.0000e-01,  1.4236e-02, -4.4007e-01,
         1.9376e-01, -1.5428e+00,  2.3343e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0602, 15.7791,  0.5558, -0.3488, -0.0258,  0.1457,  0.0220,  0.0529,
        -0.2865,  1.0951, -0.0343,  0.2029,  0.0251,  0.1054, -0.1587, -0.0225,
        -0.1672, -0.0312,  0.4919, -0.2333, -0.0815, -0.1804, -0.2143,  0.2211,
         0.1461,  0.7798,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1145e+00,  9.0621e+00,  1.1396e+00,  1.0559e-01,  3.3847e-01,
         6.9082e-02,  6.2725e-02,  4.0220e-02, -2.4894e-01, -2.1837e-02,
        -1.1821e-01, -1.8838e-01, -2.8297e-02, -4.8917e-02, -2.0626e-01,
        -9.8757e-02, -1.4645e-01, -1.7471e-01, -7.0002e-02, -3.0029e-01,
         2.4993e-02, -1.0855e-01, -1.4528e-01, -1.7388e-01, -1.5378e-01,
         3.9464e-02,  4.9152e-02, -5.6499e-03,  1.2498e-01,  3.4153e-01,
        -1.7576e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4750e-01,  3.8820e+01,  8.2140e-01, -1.5552e-02,  1.6499e+00,
         1.2917e+00,  9.3760e-01, -1.8031e-01, -4.1485e-01,  4.7743e-01,
         9.9715e-01, -5.1559e-01, -8.6029e-02, -4.9350e-01,  4.1771e-02,
        -1.2708e+00, -6.2355e-01, -1.2645e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6427, 24.2018, -0.9893, -0.3510,  0.2790, -0.4146,  1.1096, -1.3043,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 3.2353e-01,  3.8635e+00,  2.8897e-01, -1.1314e-01,  9.2348e-02,
         9.2549e-02, -2.3817e-02,  1.5987e-02, -5.2712e-02, -2.3385e-02,
         4.4055e-03, -3.9271e-02,  6.5466e-03,  5.2838e-03, -2.7176e-02,
         8.6736e-02,  2.3009e-02,  2.6959e-03, -3.1249e-02, -8.5612e-02,
         5.7145e-03, -4.6971e-02,  5.9915e-04, -1.9266e-03, -1.9551e-02,
        -1.2084e-02, -5.8484e-02,  4.1232e-02, -1.2554e-02,  3.9665e-03,
        -4.7544e-02,  9.8922e-03,  4.1524e-02,  1.5335e-02,  2.7638e-02,
         1.1436e-02,  9.1600e-03,  7.5076e-02,  1.2540e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5018, 30.4742,  1.2355, -0.2329, -1.2578, -1.2565, -0.6194, -0.0954,
        -0.6003,  0.1088,  0.1905,  0.4154,  0.9413, -0.6020,  0.5056,  0.0983,
        -0.1635, -0.5411, -0.3394,  0.0768, -0.8685, -0.7841, -0.3867, -0.2268,
         0.1209,  0.0810, -2.2863,  0.7857,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6595e+00, -1.4940e+01,  3.5940e-01, -4.9326e-01, -4.2003e-02,
         3.9574e-01,  2.8307e-01, -6.0137e-02, -1.3269e-02, -2.3588e-01,
         1.3330e+00,  3.7570e-02,  8.7361e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7776, 33.7690, -1.6266, -1.1531,  0.6883, -1.6110, -0.2471, -0.3079,
         0.4631, -1.2195,  0.4007, -1.9101,  0.5644,  2.0495,  0.3854,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8573e-01,  4.3364e+00,  7.9300e-02,  1.7043e-01,  7.0604e-02,
        -9.8496e-02,  3.7787e-02, -1.6146e-01, -3.3187e-02, -9.5482e-02,
        -1.6110e-02, -3.1524e-02, -1.1968e-01,  1.2943e-01,  3.8305e-02,
         3.3178e-02, -1.9048e-02, -7.3447e-03,  2.6274e-02, -2.5686e-02,
         6.5829e-02, -1.9247e-02,  3.2638e-02, -7.5043e-03,  4.5445e-03,
        -2.8635e-02, -4.9966e-02,  2.0262e-02, -3.8868e-03,  5.3575e-02,
        -2.1516e-02,  2.1978e-02,  1.7660e-02, -5.4768e-02,  9.7277e-03,
        -1.6661e-01, -2.1795e-02,  1.4909e-02, -5.9690e-02, -1.8810e-02,
         2.0379e-02, -4.1039e-02,  4.4130e-02,  1.2724e-02,  2.1972e-02,
         9.0484e-02,  7.8467e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9576e-02,  5.3746e+01,  2.9933e+00,  1.2158e+00,  1.1255e+00,
        -7.6213e-01,  4.5562e-01,  7.6397e-01,  3.0531e-01,  3.5565e-01,
         2.7270e-01, -1.2394e+00, -3.7651e-01, -7.6053e-01, -7.4574e-02,
        -1.6931e-01,  3.2647e-01, -1.2346e+00, -5.9727e-01, -1.2290e-01,
         3.2471e-02, -1.3169e-01,  2.8002e-01, -7.4112e-01, -1.5644e-01,
        -5.3616e-01, -1.2970e+00, -2.6479e-02, -3.8720e-01, -1.5116e-01,
        -5.5297e-01, -3.3035e-01, -9.6877e-02, -5.2351e-02,  2.0694e-01,
        -4.8246e-01, -3.5146e-01, -1.5430e-01, -4.4712e-01, -1.0032e-01,
         2.6000e-01,  1.6871e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1356e+00,  1.6474e+01,  1.0892e+00, -4.6353e-01,  5.5795e-01,
         3.7006e-01,  1.2961e+00,  3.2373e-01, -2.0130e-01,  5.4840e-01,
         4.1912e-01, -1.0620e-01, -6.0623e-03,  1.1758e-01, -1.1145e-01,
         2.8309e-01,  6.1990e-01,  2.5069e-02, -7.0570e-02,  1.9238e-01,
         2.2676e-02, -1.1610e-01,  9.5183e-02, -5.7728e-01,  5.3603e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.3513e-01,  3.8087e+01,  4.9617e+00, -6.3814e-01, -1.8827e-01,
        -6.2136e-01, -3.1272e+00, -6.6433e-01,  1.0987e-01,  3.1926e-01,
         1.2060e-01, -2.4110e-01, -2.1469e-02, -4.8084e-01, -1.2418e+00,
        -5.3540e-01, -2.9977e-01,  3.5516e-02, -4.0999e-01,  3.4290e-01,
        -1.3947e-01,  3.6006e-01,  1.2716e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1773e-02,  2.1409e+01, -3.5327e-01,  1.9809e-01, -1.2134e+00,
         1.4942e-01,  7.5544e-02, -1.2912e-01, -2.7783e-01,  1.6445e-02,
        -3.3444e-02, -6.1546e-01, -5.7430e-01, -9.8140e-02, -1.0536e-01,
         3.0058e-02, -5.8460e-01, -2.0168e-01, -1.1348e-01, -2.2134e-01,
         9.5486e-02, -2.0229e-01, -6.0371e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8601, 20.5682, -2.2119,  0.0772, -0.6532, -0.0578,  0.5460, -0.9949,
         0.4995, -0.5025,  1.4323, -0.5367,  0.2613,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8194e-01,  4.1539e+01, -2.9975e+00,  3.4593e+00, -1.7087e+00,
         1.8960e+00,  6.3504e-01,  5.3388e+00,  2.2309e-02,  1.0556e+00,
        -1.2307e+00,  2.1739e+00,  1.5129e+00, -3.1433e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3186, 39.9693,  3.6551, -1.1060,  0.6083, -0.7024, -0.4717,  0.7001,
        -0.4237, -0.7914, -0.3715, -0.3356,  0.1057, -1.6740,  0.3992,  0.3045,
        -0.1715, -0.5680,  1.8128,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 1.7052e+00,  4.5484e+00,  1.3965e-01, -1.7232e+00, -1.7034e-01,
        -1.0713e-01, -1.8953e-02, -9.9638e-03, -2.4812e-01,  1.6327e-04,
         6.3996e-02, -4.8061e-03, -1.1913e-03,  8.0994e-03, -5.7457e-03,
         1.5499e-01,  1.9054e-03, -2.0695e-01, -3.0606e-02,  6.3393e-02,
        -2.6701e-01, -1.3134e-02,  1.8535e-02,  7.1199e-03, -6.3283e-01,
        -9.9468e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2821e+00, -3.5648e+01, -2.2645e-02,  4.1279e-02, -1.3424e+00,
         1.2223e+00, -4.3817e-02,  9.7478e-01, -2.6709e-01,  6.3474e-01,
        -6.5300e-01,  6.5416e-02, -8.8854e-01, -8.6093e-02,  1.4710e+00,
        -4.6795e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7820e-01,  3.9969e+00, -2.1056e-01,  1.2497e-01, -2.7486e-02,
         1.4085e-01, -7.1225e-02,  1.1067e-01, -2.9327e-01, -3.2170e-02,
        -1.5152e-03, -1.3679e-02, -6.9259e-02,  1.2070e-01,  1.4029e-01,
         4.6376e-02,  1.8078e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3904e-04,  1.8448e+01, -3.3021e-01, -4.7104e-01,  2.6947e-01,
        -3.6391e-01, -9.6565e-01,  5.1955e-01, -1.4004e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.0559, -19.1184,  -4.1184,   0.1480,  -0.7670,  -0.3703,  -0.7271,
          0.0222,  -0.0946,  -0.4565,  -0.1638,  -0.1645,   0.0838,   0.4195,
          0.2123,  -0.2678,   0.3502,  -0.1946,   0.3486,  -0.0365,  -0.1103,
          0.0859,   0.8456,   0.5189,   0.4027,   0.0815,   0.0734,   0.2941,
          0.3643,   0.3455,  -0.0419,   0.1552,   0.1704,   0.2455,  -0.1773,
         -0.1800,  -0.0905,  -0.3745,   0.2950,   0.1651,   0.2042,   0.0871,
          0.7403,   0.4954,  -0.0273,  -5.2592], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0963e-01,  4.0562e+00,  1.1441e-04,  4.2465e-02,  1.1586e-01,
        -2.8523e-02,  1.8027e-01, -1.0096e-01, -9.5668e-02,  5.0103e-02,
        -2.8714e-02, -4.5009e-02,  1.3843e-01, -1.8252e-02, -1.6249e-02,
        -6.2513e-02, -1.0053e-02, -3.5344e-02,  1.5183e-02, -1.1763e-02,
         6.2369e-03,  1.3214e-02, -1.8286e-03, -4.9954e-02,  4.8039e-03,
        -7.6995e-03, -3.3997e-02,  1.3294e-02, -1.1452e-01, -2.1020e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2829, 56.0686,  1.9067,  3.5630,  0.3506,  0.8556,  3.2113,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.7929, 30.3650, -1.4222, -1.7689,  1.0688,  1.1250, -0.8493,  1.9554,
        -1.2485,  3.7769, -0.4074,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3798,  9.7503,  0.3327,  0.3380,  0.3097,  0.5248, -0.1244, -0.1566,
         0.2348,  0.0899, -0.2446, -0.1033, -1.2984, -0.4179,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8217, 47.9191,  2.7757,  0.9058,  1.6096,  0.5147,  0.8531,  1.0957,
         0.2893,  0.0756, -0.5094, -0.8464, -0.2414, -0.7635,  0.2957, -0.1772,
         0.1604, -0.2349,  0.1094,  0.3154,  1.3529,  0.4397,  0.5144,  0.6538,
        -0.6637, -0.6800, -1.2944,  0.2238,  0.1470,  0.1109, -0.1351, -0.3611,
        -0.0526,  1.3025,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3258,  7.3495,  0.1283, -0.6451, -0.0477, -0.1941,  0.3526,  0.2117,
        -0.3626,  0.1570, -0.3788,  0.1358,  0.2223,  0.6637,  0.0441, -0.2647,
         0.0510, -0.1549, -0.1688, -0.0259,  0.4890, -0.1596, -0.2757, -0.0153,
         0.0762, -0.1674, -0.6032, -0.0457,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.1210, 37.7586,  2.9674, -0.2740,  0.9822, -0.7266, -0.3460, -0.3195,
         0.1887, -0.4894, -0.1222, -0.6399,  0.1205,  0.4309,  1.1974,  0.7091,
        -0.3718, -0.3019, -0.4270, -0.4260, -0.2200,  0.1944, -3.3479, -0.7185,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([ -0.8203, -13.0261,   0.1624,  -0.2000,   1.6372,   0.2073,  -0.0596,
         -0.6132,  -0.8029,  -0.3306,   0.4738,   0.5243,  -0.3277,  -0.2132,
         -0.3928,   0.4419,   0.1904,   0.2784,  -0.3345,  -0.4091,   0.2969,
          0.3195,  -0.1900,  -0.1487,   0.1411,   0.2958,  -0.7480,  -0.1025,
          0.3968,   3.7772,  -2.9862,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8441,  5.2964, -1.2048,  0.4569,  0.5459,  1.6063,  0.7135, -0.2111,
        -0.3212, -0.1792, -0.4772, -0.6051,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.1222e-01,  6.5092e+00,  9.7389e-01, -8.8656e-01,  4.3703e-01,
         1.3246e-01, -8.5628e-04, -2.0947e-01,  2.5082e-01, -1.0724e-01,
        -1.6066e+00, -9.9498e-01,  1.1252e-01, -9.1539e-01,  2.2824e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8572e-01, -5.2493e+00,  1.0746e-01,  4.8745e-01,  8.7380e-04,
        -8.6602e-02, -1.9268e-02, -3.5757e-02, -8.2573e-02, -2.1514e-01,
         3.5376e-01, -3.7521e-02, -2.2948e-01,  8.7284e-02,  1.7178e-01,
         1.2761e-02, -9.5145e-02,  5.9414e-02, -2.7411e-01,  1.2008e-01,
        -1.4843e-01, -4.5547e-02, -9.6155e-02, -5.4209e-02, -1.8475e-01,
         3.8426e-01,  1.4264e-01,  5.8332e-02,  6.0153e-02,  2.1683e-01,
        -5.2513e-02, -6.1584e-01,  1.0677e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6512, -1.8874,  0.0929,  0.0736,  0.2109, -0.0743,  0.0967, -0.3235,
         0.3282, -0.0974,  0.5115,  0.1268,  0.0796,  0.3155, -0.5646,  0.9761,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4025, -8.7976,  1.3148, -0.6278, -0.7710, -0.0163,  0.7540,  1.1025,
        -0.5195,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.4143, -13.8700,  -2.1431,  -1.4319,   1.3610,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1571e+01,  2.9816e+01,  5.9360e+00, -1.7643e+00, -6.3033e-01,
        -3.0120e-01, -1.0619e+00,  2.2187e-02,  4.4891e-01, -8.0153e-01,
         2.8298e-01,  5.5185e-01, -6.3740e-01, -1.0345e+00, -1.5179e-01,
        -3.0897e-01, -4.8701e-01, -2.2857e-01,  5.4154e-01,  5.1130e-03,
         4.1616e-01, -8.4920e-02, -1.2883e-01,  2.9209e-01,  2.1900e-01,
        -1.1667e+00, -9.1101e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6359, 22.3059,  1.6797, -0.1716,  2.7011, -2.7938,  1.6451,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9350e+00, -1.0759e+01,  1.5318e-01, -2.3982e-01, -1.9682e-01,
        -5.5030e-02, -2.3123e-01,  9.5291e-02,  4.7120e-02,  1.1158e-01,
        -1.2404e-01, -1.4753e-01,  4.3088e-02,  6.7967e-03, -1.3281e-01,
         5.8271e-02, -6.1828e-01,  6.0938e-02, -1.7878e-01, -9.0158e-01,
         3.8624e-01,  1.8793e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1990e+00, -1.0854e+01, -3.2295e-01, -1.9416e-01, -6.0817e-01,
         5.6907e-02,  2.6165e-01, -1.2165e+00, -6.9178e-02,  3.7896e-02,
        -3.0972e-01,  5.8662e-01,  4.1721e-01, -2.6421e-03,  4.1137e-01,
         2.3098e-01, -7.4628e-02,  1.6307e+00, -9.3223e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3873e+00,  1.1741e+01,  3.2138e-01, -3.3951e-01,  5.5267e-01,
         8.5773e-01,  6.8247e-01,  3.3411e-02, -1.0973e-01,  2.3174e-01,
        -2.1398e-02,  1.5382e-01,  9.2940e-02,  3.3908e-01, -1.6599e-01,
        -1.8588e-03, -5.3375e-01, -6.1769e-02,  1.6496e-01,  7.5412e-01,
         3.5832e-01,  6.0357e-01,  2.0034e-01,  2.9623e-01,  1.7748e-01,
        -1.7094e-01, -2.6389e-02, -3.0040e-01,  7.8330e-02,  1.0211e+00,
        -2.2256e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-1.1580e+00,  1.7701e+01,  2.5763e+00, -1.4806e+00, -1.2852e-02,
        -3.5684e-01,  3.2204e-01,  3.7508e-02, -3.5770e-01,  2.9084e-01,
         5.5593e-02,  2.1639e-01, -4.7361e-01, -2.5614e-01, -5.2976e-02,
        -6.6008e-02, -5.7727e-01, -1.7820e-01, -7.0774e-01,  9.9174e-02,
        -1.5122e-01, -3.7728e-02, -3.7701e-01, -7.9751e-01, -3.8671e-01,
        -6.4156e-02, -2.1498e-01, -5.7839e-01, -2.0783e-01,  1.6816e-01,
         1.1238e+00, -1.8032e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.7257e-01,  6.8645e+00,  2.2410e-01,  3.7580e-02, -4.1676e-02,
         1.3355e-01,  2.6653e-01, -1.2430e-01, -1.7382e-01, -2.0909e-01,
        -5.6361e-02, -3.6141e-02,  1.4187e-02,  6.6260e-02, -1.0847e-01,
        -1.7427e-01,  1.0758e-01,  4.3865e-02, -1.9143e-03,  5.5778e-02,
         1.8674e-01, -1.0684e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1141e-02,  5.5822e+00,  5.2067e-01,  2.0434e-01, -3.4581e-02,
         2.1473e-02, -2.2792e-01,  2.9820e-02,  7.4421e-02, -9.9260e-03,
        -4.9017e-02,  5.1505e-02,  9.7264e-02, -9.1591e-02, -1.3374e-01,
        -1.1290e-01, -2.0196e-01,  5.5637e-02, -6.5070e-02, -9.5555e-02,
        -2.2030e-03, -1.1656e-01, -7.2167e-02, -9.4947e-02, -1.9070e-01,
        -1.4924e-01, -2.8617e-02,  1.2939e-01, -1.1439e-01,  2.4146e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.8869e-01,  4.2218e+00, -5.1996e-04, -1.0341e-01,  1.5384e-01,
         1.3132e-01,  3.1932e-01, -1.5138e-02,  1.0305e-01, -3.2694e-01,
         1.3031e-02,  1.2681e-01, -4.2697e-02, -1.0164e-01, -7.9302e-02,
        -1.2430e-01,  6.1406e-01,  1.0060e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8475e-01, -8.9510e+00, -1.5893e+00, -4.5225e-01, -1.2623e-01,
        -2.8961e-01,  1.0633e-01, -1.1487e-01,  1.9774e-01, -4.9487e-02,
         1.9795e-01,  1.9612e-01,  1.9724e-01,  1.7721e-01,  2.4601e-01,
         1.8959e-01,  7.8019e-03,  3.7589e-01, -9.8390e-02,  6.2962e-02,
         8.8492e-02,  1.8705e-01,  2.0351e-01,  2.3057e-01,  5.2992e-02,
         9.4806e-02, -8.2568e-02, -3.3178e-02, -1.8716e-01, -1.9197e-02,
        -3.8830e-02, -3.6655e-02, -6.7400e-03, -1.8281e-02,  1.1777e-01,
         2.9652e-01, -5.7185e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-13.3411,   3.8109,   1.4474,   0.7016,  -0.6025,  -0.8716,   0.0504,
         -4.0123,  -1.7619,   0.6118,   5.5806,   3.4075,   1.7217,   0.0657,
          0.7908,   4.5676,  -1.0310,   0.3409,  -5.1636,   0.1832,  -1.8735,
          8.7161,  43.5175,   0.1642,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.2572e-01,  5.1152e+00, -2.3546e-02,  1.4062e-02,  2.2044e-01,
         3.1169e-01,  3.2328e-02, -6.6174e-02, -2.8912e-02,  2.9892e-02,
         5.9698e-02,  5.7527e-03, -8.3166e-02, -1.0373e-01,  4.9168e-02,
         6.7537e-03, -5.6933e-02, -5.8177e-03, -1.1876e-02, -1.2911e-01,
        -1.1806e-01, -5.6375e-02, -7.7877e-02,  3.4093e-02, -6.7027e-02,
         1.3844e-01,  7.5314e-04,  7.2123e-02,  1.2186e-02, -3.4449e-02,
         2.8310e-01, -1.1574e-02, -3.8051e-02,  2.5712e-02,  5.4646e-02,
        -9.3703e-02, -7.5595e-02,  3.9162e-02, -4.9273e-02, -2.2787e-02,
        -3.8389e-01, -1.4147e-02,  8.3243e-03, -3.3825e-02, -3.9788e-01,
        -2.1938e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1032e-01,  5.8213e+00,  1.5080e-01,  2.8362e-01, -8.9207e-01,
         2.5082e-01, -1.8157e-01,  3.0517e-02,  1.7495e-01, -8.2033e-02,
        -1.6711e-02, -8.8994e-02,  5.7965e-01, -1.2769e-01, -1.4488e-01,
        -9.4599e-02,  1.3251e-01, -3.4556e-01, -4.0606e-02,  2.8256e-02,
        -6.0716e-02, -3.9349e-01,  1.1344e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6899e-01,  1.5827e+01, -4.4635e-01,  7.6046e-01, -2.1829e-01,
        -5.3023e-01, -4.9823e-03, -1.0222e+00,  1.6220e-01, -4.8997e-01,
         1.0024e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9060,  8.4608, -0.2219,  0.1885,  0.3761,  0.7731, -0.0154, -0.3050,
         0.1018,  0.2183, -0.1815, -0.1933, -0.3487, -0.4294,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0423,  3.9958, -0.0104, -0.0819,  0.1679,  0.1040,  0.0336,  0.0992,
         0.0909,  0.3717,  0.3329,  0.2084,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3980e+00,  1.4084e+01,  1.7732e+00,  7.4405e-01, -1.3675e-01,
        -1.7004e-01,  7.9748e-02,  4.3401e-01, -7.0990e-02, -6.5343e-02,
        -2.5308e-01,  5.5297e-02, -3.6373e-01, -2.8293e-01, -4.3658e-01,
        -1.1082e-01, -6.0288e-03, -1.7820e-01,  2.4742e-02, -2.0062e-01,
        -7.9425e-02,  6.8136e-02,  4.6621e-02,  7.3439e-02,  3.9318e-01,
        -6.2885e-01,  3.7137e-01,  3.5735e-02, -2.4352e-01,  3.4260e-03,
         5.7381e-02,  3.4244e-01, -5.2023e-02,  2.8690e-01,  5.6733e-02,
        -4.7259e-02, -1.0918e-02, -3.4197e-02,  7.8524e-02, -5.4130e-02,
        -2.2440e-02, -3.7584e-02, -5.0230e-02, -1.0426e-01, -5.8600e-03,
         1.5037e-01,  1.5203e-01,  1.6578e-02, -9.6424e-02, -6.8695e-02,
        -5.3457e-02, -6.4191e-02, -6.9006e-02, -6.5480e-01,  3.1749e-02],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 2.4129e+00,  1.6903e+01,  1.2419e+00,  1.8296e-03,  1.0182e-01,
         7.1420e-01,  2.7025e-01, -1.5551e-01,  2.4169e-01,  4.5821e-01,
        -2.8303e-01,  4.0034e-01, -2.6889e-02, -7.8443e-01, -5.7419e-01,
        -2.8304e-01, -6.7201e-01,  5.0452e-02,  1.2109e-02,  2.7571e-02,
         2.5027e-01,  1.9757e-01, -3.3460e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0164e-01, -7.2473e+00, -8.5663e-01, -7.9013e-01,  5.6577e-02,
         2.5249e-03, -4.4546e-02, -5.4816e-02,  2.8517e-01, -1.7546e-01,
         3.2143e-01, -3.0466e-01, -1.9783e-02,  1.1289e-01,  1.0381e-01,
        -1.4192e-01,  9.1046e-01,  2.2008e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7001, -7.3719, -0.5417, -0.6039,  0.0880,  0.0401, -0.5035, -0.5733,
        -0.4115, -0.0075, -0.1227, -0.1549, -0.6044, -0.1745,  0.0933, -0.3213,
        -0.1751,  0.3583,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0820, 11.2883,  0.6733, -0.1170,  1.2423,  0.1898, -0.2090, -0.5955,
        -0.4452, -0.4820, -0.3555,  0.0984,  0.1632,  0.2504, -0.6487,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.2852e-01,  1.1144e+01,  9.3252e-01,  6.4758e-01, -1.1684e-01,
         1.7132e-01,  2.5568e-01,  2.4440e-02,  1.1044e-01,  3.7849e-01,
         1.7828e-02, -1.0422e-02, -2.3728e-02, -4.5021e-02, -7.4778e-02,
         2.0217e-01,  2.8589e-01, -6.8894e-01,  7.9159e-03, -3.6988e-01,
         1.4513e-01, -9.2826e-02, -2.4450e-02,  4.4219e-02, -4.4791e-01,
         1.4584e-01,  6.7166e-02, -2.9107e-02, -1.4498e-01,  1.0271e-01,
         1.0302e+00,  3.1962e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5243e-01,  7.8636e+00, -4.7075e-01,  2.5970e-01,  1.7445e-01,
        -3.2271e-02, -6.3910e-02, -7.2046e-02,  9.4456e-02, -2.4358e-02,
        -6.7152e-02, -3.9697e-03, -1.1894e-01,  7.1799e-02, -1.9640e-02,
        -1.4920e-01, -8.6615e-02, -3.7042e-02,  2.7315e-02,  6.5203e-03,
        -7.5935e-03, -7.9273e-02, -3.3127e-01,  8.7906e-02,  1.4117e-01,
         2.1187e-02,  1.0483e-01,  1.9776e-03, -6.8031e-02,  7.6546e-02,
        -7.6667e-03,  5.8105e-02,  6.9879e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1457e+00, -1.1575e+01, -7.7948e-01, -1.1944e+00, -2.8143e-02,
        -4.7492e-02,  1.7898e-01,  7.2663e-02, -1.0189e-01, -6.4859e-02,
        -2.6010e-01, -3.1876e-01, -4.5448e-01,  2.4014e-01,  4.2639e-01,
        -9.3053e-02, -2.0003e-01, -1.1440e-01, -2.8049e-01, -2.5203e-02,
        -8.7571e-02, -1.5343e-01,  9.9303e-02, -3.9204e-01,  2.4759e-02,
         4.9257e-02, -4.4534e-04,  6.5920e-02, -1.5478e-01, -1.6736e-02,
        -1.1106e-01,  7.8803e-02, -1.6969e-01,  1.8178e-01,  2.4617e-02,
         9.9706e-02, -5.2739e-02, -1.0812e-01, -1.3366e-02, -8.1881e-04,
         7.0318e-02, -7.3638e-03, -1.0343e-02, -1.2366e-01, -1.0611e-01,
         5.0490e-02, -2.1761e-02, -5.5938e-02, -5.4407e-02, -6.3145e-02,
        -2.3888e-01, -1.0507e-02, -4.3882e-02, -7.2677e-02, -1.1358e-02,
        -3.9114e-02, -3.0859e-02, -4.8575e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.2446, -11.8841,  -0.1639,  -0.5952,  -0.1345,   0.0323,   0.2360,
         -0.1745,   0.2854,   0.1680,   0.2273,   0.0981,   0.0730,   0.4727,
          0.0708,   0.1974,  -0.1352,   0.0450,   0.2728,  -0.1799,  -0.0490,
         -0.0163,   0.0955,   0.2216,   0.1772,   0.3751,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2135e+00,  1.0557e+01,  7.2985e-02, -4.6546e-01, -1.4801e-01,
        -3.3484e-01,  2.4299e-01,  2.9484e-01, -4.4349e-02,  1.2930e-02,
         3.5494e-01,  3.6328e-02,  1.3684e-01,  3.0012e-01, -1.6674e-02,
        -2.6421e-02,  1.9758e-01, -2.4059e-02, -2.8979e-01, -6.5837e-02,
         9.6771e-04, -1.3456e-01, -4.0281e-01,  2.6018e-01,  2.8437e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6381e-01,  6.2638e+00,  4.8605e-01, -2.9869e-01,  1.1717e-01,
         1.9160e-01, -3.4566e-02, -1.1336e-01,  1.3447e-02, -3.8364e-02,
         8.9121e-02, -1.0800e-01, -5.4405e-02,  5.1725e-02, -1.3564e-01,
        -2.9427e-01, -1.3838e-01, -3.2382e-03, -6.6750e-02, -1.2731e-01,
        -2.5966e-02,  3.4138e-02, -6.8047e-02,  8.2673e-02, -7.0219e-02,
        -1.2303e-01, -4.5921e-02,  1.3201e-01,  6.9283e-02, -1.7519e-03,
        -2.1728e-02, -2.4789e-02, -6.7305e-03, -4.8815e-03,  9.3422e-02,
        -1.1665e-03,  8.6809e-02, -6.3573e-02, -3.1360e-02,  5.0016e-02,
        -2.2603e-02, -1.1261e-01,  1.1759e-01,  7.1012e-02, -1.5636e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1852, 13.0509, -0.0790,  0.6978, -0.2408,  0.2770,  0.0623, -0.1336,
         0.1285,  0.0183, -0.3177, -0.2841, -0.3354,  0.1058, -0.0814,  0.2651,
         0.5102, -0.0927,  0.1632, -0.1987, -0.0133,  0.3487, -1.1986,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0675, 14.3481,  0.2110, -1.5216, -0.1988,  1.3252,  0.3369, -0.2951,
        -0.4892,  1.3934,  2.9641, -0.4923,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([  0.6452, -12.1863,  -1.9056,  -0.0412,  -0.1418,  -1.1755,  -0.0260,
         -0.4274,   0.0434,  -0.1622,   0.4788,  -0.0549,  -0.2158,  -0.2728,
         -0.1642,   0.0465,   0.0492,  -0.1077,  -0.3937,  -0.1012,  -0.1498,
         -0.0736,  -0.3887,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0456e+00,  1.6011e+01, -2.9872e+00, -2.2358e-01,  1.1963e+00,
        -7.9566e-03,  3.9103e-02,  2.9698e-01,  5.2154e-01, -5.8305e-03,
        -3.9894e-01, -3.0098e-01,  2.0898e-01,  9.9597e-02, -7.5230e-01,
        -1.6355e-01,  4.9422e-02, -3.8103e-01, -2.8396e-03, -4.5121e-01,
         1.6501e-01, -8.7592e-01,  1.9683e-01,  1.5127e-01,  1.8992e-01,
        -3.0915e-01,  3.1185e-01, -2.9874e-02, -6.3464e-01, -5.1545e-01,
         2.2812e-01,  4.8026e+00,  1.9452e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5471e-01, -8.6546e+00,  9.4368e-02,  5.0850e-01, -8.3015e-03,
         1.8543e-01,  3.6520e-01,  1.4773e-01, -2.7358e-01,  1.9144e-01,
        -8.6116e-02,  7.8365e-04,  3.1172e-01, -1.4316e-01,  1.3984e-01,
         1.5871e-01,  8.7582e-02, -1.5024e-02, -7.8228e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.9687, -11.1201,   0.2403,  -0.5939,  -0.0288,  -0.7793,  -0.2371,
         -0.3096,   0.1677,  -0.1538,  -0.4774,   0.0767,   0.0676,  -1.2621,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -3.1038, -15.4447,  -1.2572,   0.3985,   1.7861,  -0.4292,   0.0645,
          0.2772,   0.3557,  -0.0294,  -0.2599,   0.1763,   0.0874,   0.6099,
          0.2587,  -0.0545,   0.0978,   0.4405,   0.2422,   0.1720,  -0.4882,
          0.1652,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3382e-01,  8.9295e+00,  1.0149e-01,  2.3092e-01,  3.6182e-01,
         1.2880e-01, -2.2431e-01,  2.9262e-02, -1.1566e-01,  5.8241e-02,
        -1.7009e-01, -2.7927e-01, -2.8341e-01, -1.4602e-01, -1.5253e-01,
         5.2686e-02,  6.4473e-01,  7.9043e-03, -7.3280e-02,  6.3072e-02,
        -3.3191e-02, -1.7605e-02,  4.5283e-02,  1.3565e-02, -1.5940e-01,
         5.3263e-02, -7.2934e-02, -7.9221e-02, -1.1053e-01, -5.5276e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2431,  6.8152,  0.0377, -0.3835, -0.1260,  0.1625, -0.3245,  0.0928,
         0.1826, -0.0317,  0.0518,  0.1205,  0.1005, -0.3860,  0.1391, -0.3408,
        -0.5995,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1461, -0.3784,  4.2576,  1.1733,  0.5629, -0.4291, -3.3991, -2.3704,
         0.8953,  5.7092,  6.1860,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.8162, -14.7572,  -2.6375,  -0.5247,  -0.0217,   0.2394,   0.2726,
         -0.3339,  -0.2384,  -0.4372,  -0.0799,   0.0949,  -0.1514,   0.0196,
         -0.7880,   0.3256,   0.4259,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3072e+00,  1.3192e+01,  2.3315e+00,  8.3201e-01,  3.8284e-01,
         6.3023e-02,  2.2682e-01,  4.4686e-03, -7.6506e-02, -2.9742e-01,
         1.0801e-01,  2.5851e-01, -1.4767e-01,  4.7643e-01,  3.1178e-01,
        -1.8283e-01, -2.2211e-02, -3.1733e-01,  5.5513e-02, -5.4306e-02,
        -1.5572e-01, -4.5494e-01,  1.6361e-01, -2.0632e-01, -2.0599e+00,
         2.2684e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7712e-01,  1.2029e+01,  1.0166e+00, -1.0225e-01,  1.6968e-01,
         2.7873e-01, -4.1576e-01,  7.3908e-01, -4.5180e-01, -3.2993e-04,
        -7.9841e-02, -9.1441e-02,  2.7092e-02,  1.4957e-01, -2.5496e-01,
         1.5225e-01,  2.5848e-01,  1.0579e-01,  2.7517e-02,  2.7203e-01,
        -1.5636e-01, -1.1597e-01, -3.8066e-02,  4.7853e-02,  1.3830e-02,
         1.8919e-01, -5.1122e-02, -3.0370e-01,  6.0124e-02, -6.6623e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4056, -7.5339,  0.4068,  0.3898,  0.1038, -0.0587,  0.0782,  0.0742,
        -0.3675,  0.1593, -0.2046,  0.5717, -0.0695,  0.1068,  0.3926, -0.0248,
         0.2845, -0.4942, -0.0334,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([-5.5393e-01,  1.3083e+01,  8.2411e-01,  2.7649e-01,  3.3076e-01,
        -2.9466e-01,  1.6186e-01, -1.5851e-01,  1.0209e-01,  1.0334e-01,
        -2.0005e-02, -9.7271e-02,  1.9628e-02, -4.3085e-02, -1.6975e-01,
        -3.6475e-02, -1.4575e-01,  3.2127e-01, -8.7443e-02, -3.0120e-02,
        -7.8676e-02, -6.6620e-02,  1.0066e-01, -5.2549e-03, -8.8937e-02,
         3.8166e-01,  6.5998e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1944, -8.6075,  0.1462,  1.2919, -0.7818, -0.5296, -0.6197, -1.1494,
         0.1791, -0.3840,  0.8968,  0.2403, -2.4411,  0.0135,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3216e-02, -7.1824e+00, -5.8979e-02, -2.2001e-01, -2.5162e-01,
        -1.8414e-02, -1.4250e-01,  2.8583e-02,  1.3433e-01,  2.6079e-01,
         7.3432e-02, -2.9149e-03,  3.1161e-02,  1.0247e-01, -1.0323e-01,
        -8.6608e-03,  7.3063e-02,  1.1472e-01,  2.1166e-01, -2.2034e-02,
         5.8022e-02,  2.6490e-01,  2.9455e-02,  7.3418e-02, -1.2506e-02,
        -1.0884e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  2.0857, -10.4604,   0.2207,  -0.0343,   0.0844,  -0.0417,  -0.1014,
          0.0935,  -0.1097,  -0.2332,   0.2440,  -0.6187,  -1.0740,  -0.1435,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5123,  8.6183,  0.1971, -0.0442, -0.1090, -0.0330, -0.2539, -0.2419,
         0.0240, -0.1064, -0.1701, -0.2708, -0.0863, -0.2798, -0.1224, -0.0522,
        -0.3035, -0.1028,  0.0190, -0.0972, -0.1088,  0.0705, -0.2046, -0.1258,
        -0.1283, -0.0259, -0.0695,  0.1100, -0.0493,  0.0704, -0.2076,  0.4648,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0245, 14.9414,  0.2970,  0.1671, -0.1287,  0.4162, -0.1556,  0.1409,
        -0.0473, -0.1779, -0.2322,  0.1290, -0.1037, -0.0833,  0.0864,  0.0249,
        -0.1052, -0.5994,  0.1361,  0.1749, -0.0700, -0.0616, -0.2336, -0.8180,
         0.3149,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0697, -8.6438, -1.3961, -0.2852, -0.0242, -0.3608,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4903e+00,  2.0689e+01,  1.1014e+00, -1.0114e+00,  2.3718e-02,
         5.3545e-03, -3.0291e-01, -2.1280e-01, -7.0092e-02, -8.2011e-02,
        -3.4674e-01,  3.4800e-01, -1.9677e-01,  3.0864e-02, -6.4356e-01,
        -5.1751e-01,  1.6355e-01,  1.3003e-01, -1.0139e-01,  2.4268e-02,
        -3.4166e-01, -4.4595e-01, -1.5610e-01, -1.7585e-01, -1.2570e-01,
        -3.2782e-01, -1.8872e-01,  1.1244e-01,  1.1136e-04,  1.0621e-01,
         5.8010e-02,  5.5025e-02,  2.1018e-01, -6.6650e-02, -2.6033e-01,
        -2.5155e-01,  3.1603e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1218e-01, -9.5480e+00, -6.1919e-01, -5.5102e-01, -3.0108e-02,
         5.3048e-02, -4.3003e-02,  2.0398e-01, -4.7926e-02,  5.3764e-02,
         1.6486e-01, -5.8134e-03, -1.4390e-01,  1.0849e-01, -5.9065e-02,
         8.9284e-02,  6.6312e-02,  1.2924e-01, -9.0515e-03,  2.9698e-02,
         2.6452e-01,  5.4408e-03,  1.2196e-01,  1.6068e-01,  5.4950e-02,
        -2.1453e-02, -1.1958e-01, -7.1653e-02,  6.8735e-02, -1.2908e-02,
         2.3793e-02, -5.0655e-02, -8.4754e-02,  2.0823e-01,  1.9992e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0478e+00,  1.6615e+01, -6.1827e-01, -4.6656e-01,  2.0256e-01,
         6.8317e-01, -6.0466e-01, -1.1259e-02, -1.0908e+00, -6.3122e-01,
         1.3072e-01,  5.3482e-02, -2.0152e-01,  1.6768e+00, -1.1116e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1726e-01, -8.9784e+00, -4.0026e-01,  1.9050e-01, -8.7633e-03,
        -2.8895e-01, -9.4963e-02, -2.2865e-01,  1.6799e-01,  8.0166e-02,
         9.9326e-04,  1.6673e-01,  6.1242e-02,  8.0821e-02, -7.7705e-03,
        -2.0448e-02, -3.2141e-02,  2.4769e-01, -4.1367e-02, -7.3062e-03,
         2.7459e-02,  2.6861e-01,  5.2412e-02, -3.2350e-02, -2.7680e-02,
        -6.9324e-02,  5.3057e-02, -7.5565e-02,  1.4012e-01, -6.0674e-02,
         6.8871e-02,  2.1691e-01, -6.9818e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3281e-02, -1.3009e+01,  3.5643e-01,  6.7179e-01, -2.4720e-01,
         7.5986e-01,  3.2282e-01, -1.2192e-01,  4.8223e-01, -1.1951e-01,
        -4.4632e-01, -4.5006e-01,  2.4579e-01,  5.6310e-01,  4.8992e-01,
         1.5746e-02,  2.1668e-01,  2.2303e-01,  1.0510e-01, -2.5577e-01,
         3.0784e-01,  2.2917e-01,  8.3716e-02, -1.0913e-01, -3.5242e-02,
         1.0119e-02,  7.8626e-02,  1.1220e+00, -4.1340e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.4104,  9.0130, -0.1791, -0.5560,  0.3731, -0.1317, -0.2591, -0.1019,
        -0.2441, -0.4928, -0.0547,  0.0161, -0.5105, -0.1530, -0.8108,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9134e-01,  5.3902e+00,  1.0870e-01, -5.6142e-02, -4.6217e-02,
        -1.1197e-01,  2.7358e-02,  3.2041e-01,  1.2504e-01, -1.9229e-01,
         1.9870e-01,  6.7655e-02,  4.5010e-02,  8.0460e-03,  1.1637e-01,
        -3.9060e-02,  5.5902e-02,  3.4502e-01, -3.2691e-03,  1.0468e-02,
         6.5262e-02, -5.8965e-02,  5.7866e-02, -2.6544e-01,  6.8327e-02,
        -1.6919e-01, -2.2485e-02,  1.0483e-01,  5.8561e-01,  4.0239e-02,
         2.3654e-02, -5.3400e-02,  2.3030e-01, -2.6252e-02,  2.8433e-02,
         1.1059e-01,  1.3231e-02,  2.1934e-02, -1.0531e-01,  2.9267e-02,
        -1.1249e-02, -2.7785e-02,  1.0910e-02,  1.1997e-01, -1.2929e-01,
         1.1277e-03, -5.8308e-02, -1.5650e+00,  1.7571e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9010e+00, -3.0370e+01,  7.2620e-01,  3.0383e+00,  1.1890e+00,
         2.8079e-01, -1.4450e+00,  4.5605e-01,  7.7863e-01,  1.5614e+00,
        -2.4511e-01,  6.7077e-01,  2.0061e-01, -1.3480e-01, -6.0262e-02,
         4.9436e-02,  2.7963e-01, -4.6424e-02, -3.5229e-01,  5.8500e-01,
         3.9112e-01,  1.1755e-01, -1.4305e-02,  4.4371e-01,  2.0206e-01,
         2.8535e-01, -1.6912e-01, -3.2067e-01,  1.6950e-02,  7.9257e-01,
        -2.8473e-01, -2.3165e-01, -2.6284e-01,  2.1150e+00,  3.2351e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0293, 14.1689, -0.1347,  0.3993,  0.1873, -0.3269, -0.7348, -0.4180,
         0.3115, -0.1729,  0.0692,  0.0885, -0.1874,  0.1697, -0.1143,  0.3676,
        -0.3079,  0.4447, -0.1914,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.8682e-01,  1.8439e+01, -1.5948e-01,  9.9525e-01,  2.6364e-01,
         1.3019e-01,  2.0756e-03, -2.0250e-02,  4.5740e-01,  3.3326e-01,
        -6.7159e-01, -2.6526e-01,  6.6804e-01, -1.0051e+00, -9.0869e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.6315e-01,  5.9204e+00, -3.7973e-01,  1.1179e-01,  4.4749e-02,
        -9.8043e-02,  4.7473e-01,  1.2458e-01, -1.3404e-03, -1.6331e-01,
        -1.2666e-01, -1.7440e-04, -1.5634e-01, -1.0231e-01, -2.8369e-02,
        -4.7150e-02, -1.2771e-01, -4.0673e-02, -2.4987e-02,  2.4705e-02,
         2.1174e-01,  2.6757e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2747e-01,  5.9826e+00,  2.4472e-01,  2.2492e-01, -1.8746e-02,
        -8.7711e-02, -2.6818e-01, -8.0633e-02, -1.6023e-01,  1.1367e-01,
        -1.3753e-02,  7.7468e-02, -4.7312e-03, -6.1404e-03,  4.1378e-02,
        -6.8107e-03, -1.4008e-01, -1.4002e-01,  1.1827e-02,  3.8991e-02,
         6.3615e-02,  1.4328e-01,  3.3190e-02, -5.6610e-02, -2.0448e-01,
         8.0864e-02,  1.9598e-01, -4.5430e-02, -5.8094e-02,  1.3985e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5651,  5.2115, -0.6465, -0.4010,  0.1065,  0.0725, -0.0900,  0.0891,
        -0.4176, -0.6873, -0.3692,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -3.8148, -24.7892,  -1.1094,   0.2465,   1.2508,  -0.1015,  -0.5477,
          0.4355,   0.6958,   0.4868,   0.9064,  -0.4553,   0.5548,   0.1121,
         -0.1011,   0.2026,  -0.6196,   0.4459,   0.0450,   0.0604,   2.2866,
          1.7554,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5996e+00, -3.1450e+01, -3.9679e+00, -8.6430e-01, -6.4024e-01,
        -8.4857e-01, -7.8268e-01,  2.9627e-01, -3.5983e-02, -6.2883e-02,
         3.4412e-01,  1.2411e+00, -7.6386e-01, -5.0617e-01, -8.4784e-02,
         7.5786e-02, -3.9367e-01, -3.0787e-02, -3.4607e-01, -1.8144e-02,
         2.3121e-01, -1.4856e-02,  7.4505e-01,  2.3713e-01,  1.9854e-01,
        -1.3446e-01, -2.2117e-01,  4.6841e-01, -9.7229e-02,  9.0991e-02,
        -7.3539e-02,  2.1859e-01, -1.0999e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.4783, -23.8565,  -2.2438,   0.3797,   1.0264,  -0.7018,   1.0263,
          0.0489,  -0.1591,   0.6903,   0.3141,   0.2781,   0.0725,   0.0642,
          0.2657,  -0.3549,  -0.3668,   0.5531,  -0.1045,  -0.0409,  -0.2466,
          0.0388,  -0.2046,   0.4158,  -0.1770,  -0.4084,   1.0068,   0.7625,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2348e+00,  3.1615e+01,  4.6534e+00,  9.5415e-01,  4.7993e-01,
         4.0396e-01,  5.7016e-01,  1.0328e+00, -1.3253e+00,  3.9843e-01,
        -7.0906e-01,  4.5467e-01, -2.5973e-02, -6.2497e-01, -2.7955e-02,
        -2.9164e-01, -2.9908e-01, -3.4545e-02, -2.3999e-01,  1.8743e-01,
        -1.7504e-02,  3.4833e-01,  4.6585e-02, -7.2568e-01, -1.1685e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 1.6581e-01,  1.4860e+01,  5.3416e-01,  1.7131e+00,  9.7613e-02,
        -2.7822e-01, -5.8222e-01, -1.0863e-01,  1.7556e-02, -2.1589e-02,
         8.1604e-02, -2.2747e-02, -1.4328e-01, -7.0015e-01,  2.7138e-02,
        -5.6762e-01, -3.8572e-01, -2.5571e-01, -1.1717e-01,  2.8310e-02,
         1.7064e-03,  5.8780e-02,  5.8352e-02,  2.6456e-01, -9.2308e-02,
        -2.7347e-01,  1.8676e-01, -8.2592e-02, -1.9199e-01, -1.5173e-01,
        -1.4013e-01, -1.3664e-01,  4.3388e-02, -1.0392e-01,  7.3425e-02,
        -1.7605e-01, -1.2419e-01, -1.0481e-01, -1.5320e-01,  2.7785e-01,
         5.3119e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1720, 22.6469,  0.2095, -0.2770,  0.7957, -0.2211,  0.1570,  0.1718,
         0.0869,  0.0530, -0.6269,  0.2807,  0.2557, -0.0557,  0.0983,  0.0454,
        -0.2647, -0.0620,  0.0285,  0.0988, -0.3723, -0.9849, -0.2037, -0.3738,
         0.1269,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7688e-02,  2.2382e+01,  3.5101e+00, -7.2009e-02,  8.2367e-01,
        -1.7586e-02, -7.8437e-01,  1.2472e+00,  4.8380e-01,  3.6573e-02,
         2.8390e-01, -3.7775e-01, -3.3667e-01,  4.0249e-02, -2.0654e-01,
         9.7409e-02, -2.5561e-02, -1.7784e-01, -9.1657e-01,  1.8338e+00,
         9.8640e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4948e-01,  1.8081e+01, -6.4702e-02, -2.6952e-01, -7.7381e-01,
        -2.7987e-01, -7.1460e-02, -1.5031e-01, -2.7759e-01,  1.8532e-01,
         1.9093e-02,  6.5849e-02, -2.4123e-01, -4.1943e-01, -4.3940e-01,
        -6.8471e-01, -1.4065e-02,  3.4132e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4191e-01,  7.7556e+00,  4.8606e-01, -1.1345e-01,  1.9740e-01,
        -4.0133e-02, -4.9613e-02, -1.7100e-01, -4.5805e-01, -7.6247e-02,
        -4.3803e-02,  5.5116e-02, -8.3669e-02,  4.2832e-02,  5.7494e-02,
        -3.0630e-02, -4.1659e-01, -1.3989e-01, -1.1550e-01,  1.0760e-01,
        -6.6666e-02, -2.4793e-02, -4.8207e-02, -4.6201e-02, -5.3182e-02,
        -1.4752e-01, -1.0807e-01,  9.1302e-02,  4.9195e-02,  8.0414e-02,
         6.0435e-03,  5.5328e-02,  5.7901e-02,  1.7828e-02, -2.0122e-01,
         9.6063e-02, -3.4940e-01,  3.1751e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.5512e-01, -1.1177e+01,  4.8718e-04, -3.2951e-01, -7.3721e-02,
        -1.4018e-01,  3.4998e-01, -1.6345e-01, -5.2905e-01,  2.7679e-01,
        -2.5541e-01,  6.4971e-02, -1.4588e-02,  1.2754e-01, -3.5552e-01,
        -9.4855e-02, -7.8968e-02, -5.7489e-02,  1.5234e-01,  1.0706e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1275, 21.7561,  1.3633, -0.4397, -0.6671,  1.0023,  0.1045,  0.1081,
        -0.1804, -0.3021,  0.3799, -0.1648, -2.6740, -0.1112,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4584e+00,  1.3166e+01, -1.1452e+00, -3.4556e-01, -7.0445e-01,
        -1.7807e-01,  2.7419e-01,  1.2482e-01,  2.2610e-01,  7.1740e-01,
         2.7828e-01,  5.0193e-02,  1.7890e-02, -8.6762e-02,  5.8507e-01,
        -3.1065e-01, -8.1259e-02, -2.9184e-01, -1.2413e-02, -2.1510e-01,
         1.8547e-01,  1.2713e-01,  1.0312e-01,  3.3425e-01, -5.6634e-02,
        -1.3828e-01, -7.8989e-02, -1.2708e-01,  1.0973e-01,  7.5214e-02,
        -3.1710e-01, -4.2454e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6957e+00,  2.0481e+01, -1.3184e-02,  8.5556e-01,  1.0913e+00,
         4.1059e-02,  5.3099e-01,  8.5998e-01,  9.3260e-02, -8.4823e-01,
        -9.6679e-02, -5.7116e-01,  1.3686e-01,  3.9864e-01, -4.2885e-01,
         3.2817e-01,  1.7853e-01, -1.0455e-01, -2.8300e-01,  2.2053e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2275e+00,  3.3525e+00,  2.5424e-01,  2.2395e-01, -3.6114e-02,
         1.8886e-01, -7.6583e-03,  1.0295e-02, -4.0915e-02,  7.2977e-03,
         2.0420e-02,  3.2987e-02,  5.2507e-03,  1.6038e-02,  6.9536e-03,
         3.5256e-02, -1.3282e-02,  2.5614e-02,  4.8636e-03,  5.5314e-03,
         2.8011e-02,  5.4320e-01, -9.4304e-02, -6.3132e-02, -5.5475e-02,
         8.2361e-03,  1.3738e-03,  5.2312e-02, -4.5394e-02, -2.8948e-02,
        -7.5768e-02,  1.7344e-02,  6.3753e-02, -9.8630e-03,  1.0650e-02,
         1.4979e-02, -3.5481e-02,  2.6244e-02, -3.0633e-02, -6.2680e-03,
        -1.0363e-01,  3.4744e-02,  8.0430e-02,  1.5681e-03,  1.2824e-01,
        -3.2417e-01,  3.9335e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0925e+00, -2.4011e+01, -3.4820e-01,  5.2310e-02, -2.2300e-01,
        -1.8420e-01,  2.7560e-02,  9.6649e-02,  1.2904e+00, -4.3300e-02,
         9.0994e-01,  2.4776e-01,  4.0506e-01, -5.0853e-02, -1.8612e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1839e-01,  9.8181e+00,  1.2846e+00,  1.5987e-01,  6.9053e-01,
         1.7183e-01, -2.1119e-01,  1.4065e-02, -6.9630e-02,  9.8746e-03,
        -1.3429e-01, -7.4310e-02, -4.1135e-02, -5.8539e-02, -1.5156e-01,
        -6.6129e-02, -2.1829e-02, -4.4683e-03,  1.3121e-01, -1.0560e-01,
        -8.1703e-03, -1.1427e-01, -5.0726e-02,  6.7386e-02, -4.2306e-02,
        -1.3146e-01, -1.1503e-01,  5.2828e-01, -1.6821e-01,  8.1253e-02,
         8.6832e-02,  1.0408e-02,  6.9920e-02, -1.2916e-03,  9.0489e-02,
         4.7428e-02, -5.8175e-02, -2.7904e-02, -5.0774e-02,  1.9613e-02,
        -3.4137e-02, -4.6695e-02, -8.2747e-02,  7.7450e-03,  5.2876e-02,
        -5.5665e-02,  1.1306e-01, -6.2915e-02], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-5.4252e-02, -3.4042e+01, -3.0985e+00, -1.6129e+00, -1.0612e+00,
        -3.7421e-01,  2.8309e-01,  7.7743e-01,  3.5312e-01, -7.4789e-01,
         2.2011e-01,  1.0143e-01,  6.9394e-01, -2.8041e-02,  4.6673e-02,
         1.3494e-01,  7.4135e-01, -1.6044e-01,  8.0968e-01, -1.5292e-01,
        -1.4861e-01, -1.1424e-02, -4.5690e-01,  4.4045e-01,  1.2135e+00,
        -8.5779e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.0212, -10.2981,  -1.2867,  -0.4398,  -0.6171,   0.1735,   0.3757,
         -0.3369,   0.0707,   0.0791,  -0.4175,   0.2018,   0.1077,   0.0790,
          0.2481,   0.1186,   0.0358,   0.1691,   0.4855,  -1.0826,   0.2913,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6943e-01,  7.2477e+00,  4.6785e-01, -3.5783e-01,  2.0297e-01,
        -4.1159e-01, -2.1547e-01, -2.2406e-01, -2.1472e-01, -2.3650e-01,
        -5.2903e-02, -3.8346e-03, -9.0693e-02,  7.2136e-03, -1.3192e-01,
        -2.4034e-01, -2.0480e-01, -2.1140e-01, -1.6157e-01, -3.5332e-02,
        -3.9183e-02, -5.0012e-02, -1.3135e-01, -1.2623e-02,  2.6744e-01,
        -2.4190e-01, -7.5452e-02,  2.6976e-02,  3.6091e-03, -4.2406e-02,
        -3.1678e-01, -1.4985e-01,  2.9694e-02,  3.2225e-02, -3.2815e-01,
         1.4844e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8070e+00, -2.6173e+01, -5.7410e-01, -1.2697e+00, -1.1737e+00,
        -1.5118e-01, -7.9037e-01,  1.4310e-01,  5.4936e-01,  3.2419e-01,
         5.5671e-01, -1.1864e-01,  1.2366e-02,  1.8600e-01, -8.8428e-02,
        -5.3385e-01,  1.8245e-01, -6.1503e-02, -8.2276e-01,  1.6457e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.7407, -12.2139,  -0.8492,  -0.0487,  -0.7005,   0.2354,   0.0335,
          0.0912,  -0.1876,   0.3349,  -0.2293,  -0.1810,   0.3489,   0.4483,
         -0.0546,   0.1547,   0.4325,   0.1390,  -0.1462,   0.1316,   0.4960,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.5996, -11.3678,  -1.0300,   0.4733,   0.2824,   0.3137,   0.4747,
          0.6235,  -0.1035,   0.2697,  -0.0591,   0.0205,   0.2537,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2375e+00,  1.4830e+01,  1.6592e+00,  4.0725e-02,  2.9323e-01,
        -4.9348e-01, -1.0770e-01, -1.2776e-01, -1.3835e-01, -8.2458e-02,
         5.4479e-01, -1.1386e-01, -9.0891e-02, -2.0627e-01,  1.8486e-01,
         8.3912e-02, -5.6543e-01,  3.9891e-01, -7.7464e-02, -2.7407e-02,
        -1.5080e-01,  1.4964e-01, -1.4476e-02,  2.4997e-01,  4.1206e-02,
         1.1198e-01, -2.4976e-01,  9.1232e-02, -1.2046e-02, -4.6867e-02,
        -1.5770e-02,  4.9383e-02,  1.0222e-01, -1.6924e-01,  3.1550e-01,
        -9.6180e-02,  4.6061e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2386,  5.5937, -0.1585,  0.1534, -0.0089,  0.0128,  0.0178,  0.1633,
        -0.0704, -0.1545, -0.0449, -0.0144, -0.1733, -0.0737, -0.0638, -0.0431,
         0.0467, -0.0120, -0.0453,  0.0731,  0.1385,  0.0505,  0.1103,  0.2059,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6954e+00,  2.6020e+01,  2.5254e+00, -4.6715e-01,  5.4544e-01,
         5.2064e-01, -2.1092e-02, -3.2478e-01,  3.2049e-01, -1.6306e-02,
        -6.5669e-01,  1.9016e-01,  8.7180e-02,  1.2176e-01,  2.6906e-01,
         2.2094e-01,  1.0054e-01, -1.9526e-01, -1.6757e-01, -4.3119e-02,
         1.9146e-01, -3.2442e-01,  3.0983e-01,  1.1793e-01, -3.6901e-01,
         1.4913e-01, -4.0368e-02,  1.0197e-01,  1.3213e-01,  5.5832e-02,
        -6.3505e-01, -2.9901e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5268e+00,  2.0801e+01,  1.6538e+00,  4.3387e-01,  1.2886e+00,
         3.7067e-01,  3.8202e-01, -2.8475e-01,  4.8487e-01, -3.0698e-01,
        -2.6504e-01, -2.7790e-01, -2.7813e-01, -4.2298e-02,  8.3871e-03,
        -7.7496e-02, -7.1956e-01, -2.7041e-01,  1.2999e-01,  2.1451e-01,
         8.8240e-01, -8.1539e-01, -2.0306e-01,  2.5853e-01,  1.1705e-01,
        -2.6449e-01, -7.6316e-02,  2.8702e-02, -2.9942e-01,  8.8433e-01,
         6.5583e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.3996, -20.8201,   0.9101,   0.1784,  -0.9933,   0.2620,   0.6946,
          0.6091,  -0.1352,   0.1737,  -0.1982,  -0.0312,   0.1702,   0.5486,
          0.2030,   0.1088,  -0.7590,   0.4137,  -0.0858,   0.3012,   0.1541,
         -0.3108,  -1.3284,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3751, 10.1736, -0.2147, -0.4150,  0.6553,  0.1225,  1.0903, -0.4854,
        -0.5705,  0.2735, -0.1703, -0.9888,  0.0643, -1.1052, -0.1526,  0.4595,
         1.7069,  0.2047, -0.5093, -0.2355,  0.2185, -0.0482, -0.1030,  0.2388,
        -0.1797, -0.2524,  6.5597, -0.0923,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-2.0670, 11.7751,  0.2779,  0.1437,  0.4856,  0.3517,  0.2646,  0.0203,
         0.0303, -0.3626,  0.1259, -0.1760, -0.0154, -0.0162, -0.0627, -0.2923,
         0.2096,  0.4341, -0.0548,  0.0328,  0.1012, -0.0555, -0.2753, -0.0917,
         0.1756,  0.1706,  0.3481,  0.6306, -0.4664,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5622e-01, -2.6814e+01,  1.0305e+00,  4.5734e-02, -6.3959e-01,
        -1.1004e+00, -1.8099e-01,  3.7265e-01,  2.0003e-01,  1.6484e-01,
        -4.2218e-01,  1.4102e+00,  2.5385e-01,  2.6290e-01,  2.6234e-01,
         4.5485e-01,  1.0443e-02,  3.3134e-01, -3.6744e-01,  5.9769e-01,
         8.0387e-01,  1.2611e+00,  8.5247e-01, -1.3105e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2679,  8.9280,  0.8398,  0.6542, -0.0504, -0.0149,  0.0144,  0.3688,
         0.0478, -0.1606, -0.2028,  0.3968,  0.0511,  0.0543, -0.0858, -0.1617,
        -0.0342,  0.0994, -0.0373,  0.1289, -0.0580,  0.1097, -0.0855,  0.0232,
        -0.0155, -0.1626, -0.1030, -0.0407, -0.1093, -0.1493, -0.1778,  0.0492,
        -0.5923, -0.2812], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7291, 37.5063,  3.0390, -1.5627,  0.9275,  1.1446,  0.2207,  0.3298,
         0.5374,  0.5974, -0.2943, -0.5306, -0.1134,  0.1884,  0.1825, -0.0414,
        -0.6828, -0.3745,  0.1008, -0.7741,  0.0433,  0.4905,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0105e-01,  8.5707e+00,  6.0984e-01,  1.4447e-03,  3.3402e-01,
        -2.1040e-01, -1.3113e-01, -1.5519e-01, -3.4058e-01, -1.5454e-01,
         5.0876e-01, -2.1523e-01, -2.4104e-01,  3.9761e-02, -9.1563e-02,
        -6.2176e-02, -6.0854e-01, -4.2688e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1029,  0.0220,  0.0433,  0.0243,  0.0025,  0.0434, -0.0848, -0.0144,
         0.0273, -0.0814, -0.0075, -0.0067, -0.0208,  0.0341, -0.0468,  0.0413,
        -0.0680, -0.0120, -0.0411, -0.0205, -0.0262,  0.2650, -0.0013,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3055,  1.1348, -0.2391,  0.5417,  0.0879, -0.9483, -0.3779, -0.2054,
         0.2438, -0.7647,  0.5065, -0.2570,  0.2088,  0.4451,  0.2355, -0.1157,
         0.1567,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4139, 18.6010,  2.6514, -0.1824,  0.3333,  0.3796,  0.4255, -0.2056,
        -0.0575, -0.0314,  0.4558, -0.3861, -0.0907, -0.0967,  0.1780,  0.6036,
         1.5845, -0.8269,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7204e-01,  1.2465e+01, -8.9336e-01, -3.5312e-01,  2.9343e-02,
        -2.8235e-01, -7.7937e-02, -3.5566e-01,  4.1512e-02,  6.0984e-02,
        -2.0203e-01,  2.5487e-01,  4.4317e-02,  4.7166e-03, -7.6643e-02,
        -1.9242e-01, -1.4320e-01,  6.3753e-02,  2.4713e-01, -3.7694e-01,
         1.3923e-01, -1.0670e-01,  7.7960e-02, -5.1337e-01, -4.6428e-01,
        -2.8798e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3504,  3.9491,  0.2066, -0.1361, -0.0625, -0.0886,  0.1007, -0.1034,
        -0.2094, -0.1249, -0.1855, -0.0100,  0.0367,  0.0646, -0.0260, -0.0344,
        -0.0702, -0.0342, -0.0103, -0.0129,  0.0972,  0.0279, -0.0086, -0.0105,
         0.0089, -0.0284, -0.0927, -0.0596,  0.0707, -0.2048,  0.1673,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0968e+00, -3.3686e+01, -2.3890e+00,  8.9757e-02, -3.4037e-01,
        -9.2518e-01, -1.2064e+00, -9.6845e-01,  1.9058e-03,  1.6069e+00,
         3.7807e-01, -1.8845e-01,  1.4034e+00, -4.7058e-01,  6.7258e-01,
        -3.5004e-01, -6.1346e-01, -9.9555e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4023, 19.1962,  1.1606, -2.3581, -0.0733,  1.1018, -1.6987, -0.1213,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 2.9855e-01,  2.7781e+00,  1.6423e-01, -9.2330e-02, -1.4637e-01,
         1.2633e-01,  2.4996e-02,  1.0682e-02, -2.7144e-02,  4.8665e-02,
         2.2957e-03,  1.2976e-02,  4.6816e-03,  3.1563e-02,  4.2689e-02,
         9.0399e-02,  5.1147e-02, -1.4926e-02,  2.4929e-02,  1.2937e-02,
         3.7821e-02,  2.3210e-02,  2.6456e-02, -1.2374e-02, -8.2857e-03,
        -2.1796e-02, -1.4784e-02, -6.6790e-03, -9.7631e-03,  4.8179e-02,
        -1.3217e-02, -2.5558e-03, -9.9453e-03,  3.7654e-03, -3.3592e-02,
        -8.4216e-02,  6.9320e-03, -3.4777e-02,  1.9328e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0863e+00, -2.6777e+01,  1.7743e+00, -3.2405e-01, -8.8156e-01,
         2.0679e-01, -2.1746e-01, -8.3351e-02, -8.9164e-02, -1.5994e-02,
        -1.8506e-01, -1.0465e-01, -1.3304e+00, -1.3021e-01, -4.0929e-01,
         8.6077e-02,  1.5131e-01,  7.5950e-01,  1.4199e-01,  4.0985e-01,
         3.7387e-02,  1.9825e-01,  2.2038e-01,  6.7659e-02, -8.4184e-01,
         8.4848e-01, -7.1850e-01, -8.6456e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5768, -7.5747,  0.3799,  0.2240,  0.1990,  0.1939,  0.0928, -0.0329,
         0.0113,  0.0110, -0.4526, -0.9036,  0.5947,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -3.4017, -23.8751,   3.6360,   0.5519,   0.2076,  -0.0938,   0.0330,
         -0.0912,   0.5245,   0.4966,   0.1638,   0.4049,   0.6924,   0.9163,
          0.8922,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4061e-01,  4.3643e+00,  1.7009e-01, -4.5416e-01,  1.6894e-01,
        -7.6535e-02, -4.2589e-02, -7.0682e-02, -2.7044e-01, -5.4737e-02,
         5.9167e-02,  2.0002e-02,  3.2559e-02,  1.0752e-01,  8.7030e-02,
         4.8641e-02,  3.8852e-02, -1.5889e-02,  9.4704e-02, -4.4042e-02,
        -2.0368e-02, -4.2830e-02,  1.2075e-02, -4.5466e-02, -5.7866e-02,
         1.9182e-03, -3.7521e-02, -8.0183e-03, -3.6861e-02, -1.1408e-02,
         6.4788e-03, -1.3938e-02, -1.3521e-02,  8.0662e-03,  8.4776e-03,
        -3.5253e-02, -3.5773e-02, -4.3844e-02, -3.5839e-03, -6.7529e-02,
        -8.1060e-02, -2.3307e-02, -4.5358e-02, -2.9691e-02, -8.5394e-03,
         6.7057e-02,  7.9409e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9462e-01,  1.5236e+01,  5.2972e-01,  4.4375e-01,  5.8300e-03,
         1.1904e-01,  4.9376e-02, -5.5663e-02,  2.6632e-01,  1.1427e-01,
         4.6407e-02,  5.3798e-02, -5.6874e-02, -1.1517e-01, -7.0074e-02,
        -1.2547e-01,  1.4071e-02, -2.9828e-02,  2.4998e-02,  3.1954e-02,
        -2.0838e-02, -4.4956e-02,  2.2574e-01,  1.1546e-01,  9.0716e-02,
        -1.3048e-02,  2.2630e-01,  2.0076e-02, -1.9912e-01, -4.4185e-02,
         9.6793e-02, -2.3136e-02,  1.2886e-01,  1.1171e-01,  9.3988e-02,
         5.8789e-02,  9.3626e-02,  6.5050e-02, -2.8707e-02,  1.0554e-01,
         2.1246e-01, -7.9388e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6875,  7.3485, -0.0120, -0.2937, -0.0876, -0.0714, -0.2411, -0.2121,
        -0.0973, -0.0476,  0.1409,  0.1227, -0.2535, -0.0698, -0.0112, -0.0289,
        -0.0204, -0.1082, -0.0558, -0.1522,  0.0623,  0.0083, -0.1421, -0.5084,
        -0.6177,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3730e-01,  8.0090e+00,  2.8966e-01,  3.2541e-03, -9.5287e-02,
        -8.1566e-02, -2.4733e-01, -2.9059e-02, -9.6923e-02,  1.2518e-03,
         3.5809e-02,  4.4055e-03,  3.5439e-02, -9.8750e-02, -3.1662e-01,
        -1.5018e-01, -7.7899e-02, -8.5525e-02, -7.4839e-02, -1.7449e-02,
         4.2193e-02,  3.5947e-02, -9.3847e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1505e+00,  2.4287e+01,  2.7329e+00, -1.0282e-01, -2.3587e-01,
         1.2829e-01,  1.1881e-01, -1.1350e-01,  8.3411e-02,  5.8359e-01,
         3.8657e-01,  4.0061e-01,  3.9863e-01,  9.5487e-02,  2.3744e-01,
         2.2752e-02, -9.3658e-02,  4.5588e-01,  5.6997e-02, -4.0300e-01,
         1.9546e-01,  1.0833e+00,  1.0478e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8468, -7.2632,  0.2291, -0.1664,  0.0476,  0.0272,  0.6423, -0.7328,
         0.0737, -0.2356,  0.3088,  1.1302,  0.3181,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -3.9646, -29.6998,  -1.2713,   1.5904,   0.5803,   0.1614,   0.2699,
          0.4079,   0.6642,  -0.6398,   0.2786,  -0.0957,   0.3510,   1.2894,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.0346, -20.6048,  -1.7863,   0.5966,   0.1674,  -0.0237,   0.4033,
          0.4112,   0.4384,   0.3681,   0.5479,   0.5776,   0.1645,  -0.1097,
          0.0349,   0.2578,   0.6506,   0.3367,   1.7473,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 1.5472e-01,  6.6297e+00,  5.9864e-02, -4.4878e-01, -1.8855e-01,
         7.2241e-02, -2.3804e-02,  4.4452e-01, -1.7218e-01,  1.1753e-01,
        -7.5294e-02, -1.3422e-03, -1.1863e-02, -6.6818e-02, -4.6792e-03,
         3.6973e-02,  9.1754e-02,  5.6250e-02,  7.4264e-02,  5.9176e-02,
        -8.9162e-02,  4.3956e-02, -8.5776e-02,  6.7624e-02, -3.6671e-01,
        -7.0836e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.3214, -26.4328,  -2.3707,  -0.6850,   0.0355,   0.3021,   0.3582,
          1.2110,   0.2276,   0.5611,   0.2891,   0.1324,   0.5318,  -0.2959,
         -0.4882,   0.3237,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.2265, -10.5515,  -0.7553,  -0.6469,  -0.0729,  -1.2134,  -0.0500,
         -0.9063,   0.3626,  -0.1273,   0.0250,  -0.2624,  -0.1333,   0.0671,
         -0.5220,  -1.0804,  -0.9821,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2840, 39.7176, -4.5450, -8.6712,  1.6423,  1.1354, -0.7457, -0.5448,
         1.2890,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1435e+00, -1.1940e+01, -2.1780e+00,  1.3358e+00, -5.0754e-01,
        -3.6003e-02, -4.8912e-02,  5.0626e-02, -2.5123e-01, -1.7078e-01,
        -1.7726e-01, -3.2212e-02,  1.5343e-01,  4.5230e-03,  1.2989e-02,
        -1.2864e-02, -6.0378e-02,  4.1291e-02, -5.6037e-02,  3.6519e-01,
         9.0333e-02,  2.1845e-01,  1.9002e-01,  3.9991e-01,  2.4523e-01,
         2.8499e-01,  1.2032e-03,  1.9756e-01, -1.0632e-01,  5.9564e-02,
        -8.2260e-02, -1.6312e-01, -9.0462e-02,  5.1615e-02, -4.6844e-01,
        -3.0491e-01,  1.5602e-01, -8.1267e-02,  2.5737e-02,  1.5425e-01,
         2.7612e-01, -1.4187e-01,  3.3949e-01,  3.1938e-01,  1.2864e-02,
        -1.2122e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4970e+00,  1.3835e+01, -1.2431e+00,  6.2851e-01, -2.0060e-01,
        -6.9451e-02,  2.4360e-01,  3.3715e-01, -6.4725e-03,  1.7159e-01,
        -3.9262e-02,  1.7119e-01, -2.8633e-01,  4.5264e-02,  1.4099e-01,
         2.2377e-01,  2.2334e-01,  4.7706e-01, -7.4364e-02,  1.7798e-01,
         8.0012e-02, -4.5975e-02, -1.9261e-01, -1.1878e-01, -2.2779e-02,
         3.9224e-02,  3.1086e-01, -2.4276e-01, -1.3126e+00,  8.1512e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.2485, -15.8384,  -0.9335,  -1.7759,   0.5300,  -0.5958,   0.9789,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1507, -8.3800,  0.5563, -0.1392,  0.0541, -0.2008, -0.0848, -0.6447,
         0.0926, -0.1483, -0.0504,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.2308, -28.4514,  -1.9571,  -0.7842,  -0.3766,  -0.7712,   0.2009,
          0.0879,   0.1369,   0.5729,   0.2015,  -0.7646,  -0.1547,   0.1055,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2102e+00, -2.2229e+01, -1.2995e+00, -7.6586e-01, -6.8848e-01,
        -1.9481e-01,  1.0598e-01,  3.2272e-01,  2.0780e-01,  3.2132e-01,
         1.5358e-01, -9.8383e-02,  4.2661e-01,  5.3417e-01, -1.8758e-02,
         3.0271e-01, -1.1304e-01, -2.7321e-01,  1.8917e-01, -2.2156e-03,
        -1.3610e-01, -9.4923e-02, -2.3867e-01, -1.1176e-02,  2.9404e-02,
        -8.6827e-02,  2.5744e-01, -1.9107e-01, -7.2389e-02,  3.9658e-01,
         8.9061e-01,  1.8023e-01,  3.1527e-01, -7.3040e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1275e+00,  1.1850e+01,  1.7687e+00,  3.3741e-01,  2.7607e-01,
         4.1119e-01,  6.9055e-02, -1.2429e-02,  1.6533e-01, -2.2954e-01,
         1.0176e-01,  1.2460e-03, -2.1335e-01,  1.4467e-01, -4.1342e-01,
        -3.3013e-01, -8.0039e-02, -5.6539e-01, -2.5501e-01, -1.2815e-01,
        -3.0069e-01,  5.0190e-02,  1.9789e-02,  4.0789e-01,  4.3800e-01,
        -9.5108e-02,  1.5029e+00, -2.0423e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1352,  6.5123,  0.3628,  0.2195,  0.0702, -0.0328,  0.0361,  0.0300,
        -0.0288,  0.0401,  0.0836, -0.1387,  0.3497,  0.5312,  0.1178, -0.1305,
         0.0522,  0.0226, -0.0932, -0.0555, -0.0894,  0.1353, -0.1652, -0.3302,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 3.8486, 20.3697, -0.8896,  1.0137,  0.1292,  0.1017,  0.4779, -0.5550,
        -0.1077, -0.1103, -0.2767,  0.2013,  0.6874,  0.5853,  0.0271,  0.3599,
        -0.1676, -0.2563, -0.0758, -0.0489, -0.1580, -0.2992, -0.0649, -0.0698,
        -0.1212,  0.8915, -0.0530, -0.0601,  0.4310, -0.9417, -0.6114,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0655e-02,  1.7079e+01, -1.9770e-02, -1.4698e+00, -1.6963e-01,
         1.1786e+00,  3.7198e-03,  2.4240e-01,  1.7470e-01,  2.3312e-01,
         3.3686e-01,  3.3359e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5439e-01,  1.7298e+01,  6.3982e-01, -1.2150e+00,  1.0209e-01,
        -7.8124e-02, -1.2033e-01,  2.0448e-02,  3.4292e-01, -8.1633e-01,
         1.0958e-01, -4.5377e-01,  2.5449e-01, -2.5134e-01, -1.3761e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9059e+00,  2.7477e+01,  2.7065e+00, -6.6362e-01,  1.1640e+00,
        -5.7423e-01, -6.1322e-01, -5.4995e-02, -1.5913e-01, -7.1290e-01,
        -3.9377e-01, -1.3992e-01,  6.8116e-02,  1.7296e-01, -1.3655e-02,
        -3.8205e-01, -1.2328e-02, -3.5912e-01, -4.1797e-02,  1.1623e-01,
        -2.5901e-01, -4.1889e-02, -2.2057e-01, -5.1183e-02,  1.9269e-01,
         6.2795e-02,  1.5336e-01, -3.4460e-01, -5.6642e-02, -5.7841e-01,
        -6.2490e-02, -2.6620e-01, -1.7134e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8618e+00,  4.0316e+01,  4.7132e+00, -5.0480e-01,  6.1410e-01,
         1.1188e-01, -1.0376e+00, -6.9989e-01, -4.3228e-01, -1.3949e-02,
        -1.3475e-01, -7.6700e-01, -4.2569e-02,  2.1300e+00, -4.9587e-01,
        -2.0275e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.2924, -19.0899,   6.5956,   0.4441,  -0.3757,  -0.0229,  -2.4924,
          0.0237,  -0.4454,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7161,  8.0815, -0.8745,  1.0083,  0.3007,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0168e+00,  1.6917e+01,  3.7500e-01,  3.1580e-01, -2.0611e-01,
        -4.2996e-01, -4.4456e-01, -1.9380e-01,  2.0287e-01, -1.4602e-01,
        -1.1032e-01,  1.8591e-01,  1.3698e-01, -1.2051e+00,  1.6743e-01,
         9.8586e-02,  3.9636e-02,  8.1127e-02,  2.2612e-02, -2.9562e-03,
         1.2160e-01, -1.4910e-01,  8.0925e-02,  3.2609e-01,  3.4632e-02,
         4.1854e-01,  8.7296e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.0991, -15.4602,  -1.5750,  -0.8112,  -0.9614,  -1.9336,   0.2505,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5742e+00,  4.2013e+01, -1.2403e+00,  5.9013e-01,  1.8580e+00,
         5.4585e-01,  2.7056e+00,  6.5364e-03,  1.6805e-01, -1.9727e+00,
         5.9168e-02,  2.3281e-02,  2.2393e-01, -7.2752e-01, -3.0855e-01,
         5.7213e-01, -1.0842e+00,  5.1225e-01,  5.8837e-01,  1.5802e+00,
        -5.5485e-01,  1.1872e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0630, 29.7981,  0.5269,  1.0100,  0.8891,  0.4379,  0.1257, -0.1338,
        -0.1773, -0.7152,  0.9227,  0.2858,  0.1914,  0.3785, -0.3746,  0.3034,
        -0.0774, -0.6895, -0.5605,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1037e-01,  1.0922e+01,  1.4372e-01, -7.0326e-03,  2.9628e-01,
         2.3949e-01,  1.0642e-01,  3.9031e-01,  3.1870e-01, -5.8046e-02,
         4.5937e-02, -7.5549e-02, -8.2434e-02,  2.0854e-01,  1.3356e-01,
        -5.7618e-02, -7.0333e-02,  6.5544e-02, -1.0227e-01,  3.5885e-02,
         1.3454e-01, -1.3021e-01, -5.5601e-02,  2.5789e-01,  4.5164e-02,
         2.7541e-02, -9.4795e-02,  1.2042e-01, -1.2479e-01, -1.9936e-01,
         1.9848e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 5.9296e-01, -2.6393e+01, -4.9809e+00,  1.7637e+00, -1.2199e+00,
         3.3286e-01, -3.1014e-01,  4.9691e-01,  9.3451e-02,  2.8346e-01,
        -1.3329e-01,  1.0664e-02, -3.4536e-02, -3.9804e-02, -1.0116e+00,
         1.5164e-01, -1.8143e-01, -1.1179e+00,  9.1358e-01, -9.8203e-02,
         1.9415e-01,  2.9101e-01, -1.1093e-01,  1.9278e-01, -1.1423e+00,
         5.7841e-02,  2.3546e-01,  6.2348e-01, -2.2254e-01,  6.1879e-01,
        -5.0213e-02,  1.4441e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8625, 22.3733,  0.5832,  0.0513, -1.4405,  0.2352,  1.0016,  0.0611,
         1.3478,  0.0548, -0.7575,  0.8537,  0.2626, -0.0956,  0.1746, -0.3705,
         0.0409,  0.0928,  0.1615, -0.0922,  0.1238,  0.8761,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3073e+00,  1.3970e+01,  1.0796e+00,  1.6220e-01, -4.1157e-02,
         1.2577e-01,  1.8749e-01,  2.4262e-01,  1.5460e-02, -1.5224e-01,
        -2.4655e-01, -1.6948e-01, -9.0354e-01, -1.8510e-01, -4.5190e-01,
         9.2195e-02, -1.1740e-01,  1.2715e-01, -1.6766e-01,  4.3789e-02,
        -2.4710e-01, -1.9031e-01,  1.2561e-02, -1.6378e-01, -1.4558e-01,
         1.9040e-01, -1.7673e-01, -6.7646e-01,  6.4641e-02, -5.0694e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0754, 12.0178,  0.6055, -0.2346, -0.0748,  0.2568, -0.4236, -0.0529,
        -0.2118, -0.1064, -0.1607,  0.2536, -0.5793,  0.1793, -0.1432,  0.1525,
         0.0778, -0.0993,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2708e+00,  3.0003e+01,  1.8752e+00,  6.1892e-01,  4.6374e-01,
        -3.0914e-02,  9.1608e-02, -8.1349e-02,  3.6790e-03, -1.1237e-01,
        -5.8243e-02, -1.2675e-01, -1.4239e-01,  5.6106e-01, -3.0961e-01,
        -6.5224e-01,  1.4471e-01, -5.8932e-01,  1.0046e-01, -3.6691e-01,
        -2.0944e-01,  3.2487e-02, -3.5296e-01, -1.9447e-01,  1.3574e-01,
        -6.2523e-02,  1.3337e-01, -7.0323e-02, -1.7059e-01,  2.2442e-02,
        -9.2862e-02, -6.3893e-02,  1.0315e-01, -4.4972e-01,  2.3783e-01,
        -1.3575e+00,  4.4343e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9677, 15.7481, -2.6389, -1.5397, -0.7882,  0.3422, -0.4820,  0.1778,
         0.1973,  0.1674, -0.2601,  0.8857, -0.4142,  0.0580, -0.2001,  0.0164,
        -0.0382, -0.0920,  0.1596, -0.1469, -0.1392,  0.0374, -1.3693, -0.0948,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7540e-01, -2.6254e+01, -2.8507e-01, -1.0999e+00, -7.4807e-01,
        -3.1747e-02, -3.0935e-01, -1.3714e-01,  9.0858e-02,  1.9687e-01,
         3.9707e-01, -2.3654e-01,  3.5628e-01,  3.4512e-01,  3.2309e-02,
         2.3871e-01,  3.3434e-02, -1.9340e-02, -4.5367e-02,  3.7310e-02,
         6.9761e-03,  3.4980e-02, -1.1204e-01,  2.4132e-01, -2.5789e-01,
        -5.7704e-01, -2.5191e-01, -1.6685e-01, -2.4938e-01,  2.1078e-01,
        -4.0777e-02, -5.2347e-02, -2.5155e-01, -8.9167e-02, -2.5426e-01,
         1.5361e-01,  5.4059e-01,  3.3659e-01, -1.5323e-01,  3.1365e-02,
         5.1903e-02,  1.8743e-01,  2.7559e-02,  9.2910e-02, -2.1446e-02,
        -7.6433e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7829, 29.1650, -1.4078,  0.0825, -0.5966, -0.3095, -0.2227, -0.2656,
         0.2738, -0.7205,  0.3749,  0.3158,  1.1508, -0.0562, -0.3906,  0.2825,
         1.1152, -0.4215,  0.0800, -0.1413,  0.7016, -2.9763,  0.8244,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -4.5839, -31.8852,   0.2416,  -2.8475,   0.9542,  -1.2142,   0.7011,
          1.3444,   0.0401,   0.7459,  -0.6626,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1236, 24.6940,  2.1984, -1.9121, -0.8919, -1.1685,  0.9427, -1.0017,
         0.3618,  0.3117, -0.0404, -0.0590, -1.1112,  0.7852,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5960, 21.0066,  0.8377, -0.9327,  0.3300, -0.3262,  0.4237,  0.8718,
        -0.2239,  0.9734,  1.7844,  0.6334,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9618e+00, -3.5122e+01, -4.8394e+00, -1.8022e+00, -7.6358e-02,
         1.2125e+00,  1.5414e-01, -4.3628e-01, -2.0966e-01,  3.0010e-01,
         4.2230e-01,  1.7541e-01, -8.4713e-03, -2.2119e-01,  1.9993e-01,
         3.7842e-01,  5.1012e-02,  2.9443e-01, -3.4217e-02,  1.0463e+00,
         7.1914e-04, -1.3075e-01, -1.7470e-01, -3.7935e-01, -1.2115e-01,
         4.1850e-01, -1.1476e+00,  3.9770e-01, -5.7348e-02,  1.4318e-01,
         2.4153e-01, -3.3634e-01,  2.1804e-01, -6.2543e-01,  3.5051e-02,
         8.7488e-02, -4.8129e-02, -2.6611e-02,  2.2577e-01,  1.8517e-01,
        -7.6888e-03,  7.4299e-02, -1.6797e-01, -4.8153e-02,  1.5009e-01,
         1.3893e-01, -3.5705e-02,  5.2525e-02,  1.5073e-01,  2.0676e-01,
        -4.3278e-02,  4.4220e-02, -1.7963e-02,  3.2452e-01,  1.7985e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 3.9919e+00, -2.8665e+01, -1.1180e+00, -1.8283e+00, -1.1274e+00,
        -1.1043e+00,  8.0873e-03,  7.4952e-03,  8.9968e-01, -1.4688e-01,
         3.7235e-01, -1.0446e+00, -4.9418e-01, -2.7514e-01, -8.1334e-01,
        -5.5482e-01,  5.0429e-01, -6.5505e-01,  1.3689e-01, -6.4231e-01,
         2.6945e-01, -5.7190e-01,  7.9909e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8125e-01,  2.0606e+01,  4.3583e+00,  2.8327e+00,  3.6384e-01,
         4.5365e-01,  3.7585e-01,  6.6291e-01, -4.9618e-01, -7.5206e-02,
        -5.8009e-01,  3.3234e-01,  1.1446e-01, -7.0496e-01,  3.2598e-02,
         1.9544e-02, -8.2088e-01,  4.6382e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1197,  9.6530,  0.3618, -0.3448,  0.2792, -0.1609,  0.0838,  0.4054,
        -0.2492, -0.0596,  0.1404,  0.3047,  0.5960,  0.0501,  0.0627, -0.1038,
         0.0881, -0.5417,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.8613, 45.1072, -1.8607, -2.3567, -0.9571,  0.1329, -0.3045, -4.1201,
        -1.2535, -0.6661, -0.7603, -0.9968, -0.2543, -1.2366, -0.8053,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0332e-01,  2.2662e+01,  1.7972e+00,  9.5841e-01,  9.6410e-02,
         6.8371e-01,  6.3157e-01,  2.7156e-01,  7.4857e-01,  3.2328e-01,
         3.1505e-01, -2.6432e-01,  2.5658e-01,  4.2177e-02, -3.4684e-01,
        -3.3699e-01,  2.2953e-01,  5.7990e-01,  5.2500e-03, -3.8354e-01,
         2.1067e-01, -6.6043e-01, -1.0091e-01,  6.8272e-02,  1.0507e-01,
        -2.1315e-01,  4.0254e-01, -3.2439e-02, -1.3529e-01,  4.6562e-02,
         2.8069e-01, -1.0560e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4007e-01,  2.1622e+01,  1.6774e+00,  5.9673e-01,  1.0576e+00,
         1.7934e-01, -6.7216e-02,  2.6881e-02, -2.4575e-01, -1.4350e-01,
        -3.2383e-01, -3.0281e-01, -6.0607e-02, -7.9421e-01, -1.4552e-01,
        -1.4104e-02, -1.6307e-01,  7.3272e-02, -4.9572e-01, -4.1395e-01,
        -2.6625e-01, -4.8644e-02, -6.4995e-01, -2.7337e-01,  5.3703e-02,
         2.2545e-01,  1.2794e-01,  3.2649e-02,  3.8376e-03, -9.3823e-02,
         2.0310e-01,  6.7853e-03,  4.9267e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2163e+00, -1.1863e+01, -5.6879e-01, -7.8251e-01, -3.9523e-01,
        -1.5279e-01, -2.1820e-01, -1.1582e-01,  4.7404e-02,  2.9487e-02,
        -1.9028e-02, -5.0885e-02,  4.1814e-01,  2.9251e-01,  2.6936e-01,
         3.8598e-04,  6.8802e-02,  1.0407e-01, -1.2840e-01,  3.4535e-02,
         6.7882e-02,  9.0914e-02,  7.4150e-02, -2.2507e-01, -8.3765e-02,
         1.3428e-01,  4.1067e-02, -7.1904e-02, -3.1136e-02, -5.1568e-02,
        -1.4015e-01, -8.7985e-02,  3.4779e-02, -5.3444e-02,  7.1494e-02,
         5.5742e-02,  1.1711e-02,  3.1990e-02, -9.0757e-03, -6.8715e-02,
         3.6866e-03, -4.3425e-02, -1.6699e-03,  1.0999e-01, -9.4122e-03,
        -1.0207e-01, -7.1268e-02, -3.4169e-02, -1.3866e-01,  6.3840e-02,
         3.6518e-02, -1.0298e-01, -6.8470e-02,  4.0510e-02, -1.0603e-01,
        -9.1841e-02,  1.3389e-01,  3.1989e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4036e+00,  1.7011e+01, -1.4331e-01, -5.7858e-02, -2.1816e-02,
         2.7417e-01,  2.3573e-01,  1.4529e-01,  6.5614e-02, -8.9184e-02,
        -1.2787e-01,  7.1173e-02, -3.3256e-02, -2.2749e-01,  4.2101e-02,
        -9.6291e-02, -9.7476e-02, -4.7796e-02, -1.2442e-01, -4.7579e-01,
        -1.8119e-01, -3.8554e-03, -1.2171e-01, -4.9605e-02, -4.0620e-01,
        -1.8985e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8449, 21.8160,  0.6444, -0.2797, -0.4160,  0.7232,  0.1551, -0.4164,
         0.2766, -0.2922, -0.0347, -0.2942,  0.1203, -0.0761,  0.0732,  0.1067,
         0.3347,  0.2568,  0.1563,  0.2517, -0.0728, -0.4724, -0.1282, -0.6919,
         1.0644,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1724e-02,  2.6932e+00,  1.6876e-01,  7.6325e-02,  2.4208e-02,
        -2.6878e-02, -8.3915e-02, -3.7307e-02,  1.4310e-02, -1.3252e-01,
        -6.4678e-02, -3.1740e-02,  7.6711e-02,  4.0071e-02, -1.0664e-02,
        -6.8787e-02, -3.1812e-02,  4.7222e-02,  6.7761e-04,  6.3711e-03,
         3.6990e-02, -3.2466e-02, -5.1353e-03,  4.3421e-02, -4.4779e-02,
        -5.1178e-02,  6.2475e-02,  6.8512e-03,  3.9930e-02, -5.7424e-02,
         8.7125e-03,  9.8418e-04, -2.2043e-02, -5.1722e-03, -3.4874e-02,
         2.1851e-02,  6.3985e-02, -4.8709e-02,  3.8136e-03,  4.6821e-02,
        -3.7092e-03, -1.6885e-02,  2.9529e-04,  8.9761e-02, -8.9227e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.4285e-01,  1.3261e+01, -2.2228e-01,  6.9296e-01,  8.1812e-02,
         2.5729e-01,  1.2801e-01,  3.0457e-01, -7.4231e-01, -2.7693e-01,
        -1.5546e-01,  1.4433e-01, -2.8507e-01, -1.2444e-01, -1.2167e-01,
         1.6884e-01, -3.2497e-01, -8.0343e-03,  1.0949e-01, -5.2767e-02,
         1.2995e-01, -8.5167e-01, -4.6210e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6122,  5.9142, -0.4884,  0.0164,  0.0578,  0.0700, -0.0117, -0.0504,
         0.0610, -0.1132, -0.2108,  0.3328,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-3.4882e+01,  8.7385e+00, -1.9734e-02,  2.4278e+00, -8.3699e-01,
         1.5999e+00, -4.7991e+00, -3.5156e+00,  1.6079e+00,  1.2436e+00,
         1.4563e+00, -3.4767e-01,  1.4868e+00,  1.2720e+00, -4.0124e-02,
         1.3879e+01,  1.6855e+00, -4.9552e+00,  3.9952e-01,  5.9830e-01,
        -2.3714e+00,  3.2456e+01,  1.0938e+01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2978e+00, -2.8218e+01,  1.7213e+00,  5.1262e-01, -2.0181e-01,
         3.7224e-01,  7.7469e-02, -4.3429e-01,  2.0411e-01, -5.0858e-01,
         4.3775e-01,  2.5384e-01, -2.5172e-01, -3.4640e-01,  9.1781e-01,
         2.4223e-01,  3.1028e-01,  9.4070e-01,  2.2857e-02, -3.8643e-01,
        -3.8351e-01,  1.9051e-02,  3.4005e-01,  3.3867e-01, -1.6701e-01,
        -8.7746e-02, -2.7072e-01, -1.9303e-01,  3.7713e-01, -1.1398e-02,
        -1.3279e-01,  4.2759e-01, -1.5345e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.1847, -33.3214,  -0.7001,   0.4140,   0.2345,   0.4097,   1.1093,
         -0.6177,  -0.8359,  -0.0898,   0.6909,   0.2113,   0.0769,  -0.3804,
         -0.4733,   0.6130,  -0.1923,   1.1862,   1.0032,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1952, 13.2878, -1.0635,  0.2382, -0.6463,  0.8957, -0.1259, -0.2177,
         0.0791, -0.2562,  0.1667, -0.0750,  0.2065,  0.0751,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.4168, -32.5972,  -3.3006,  -0.8956,   1.5432,   0.5724,   0.4132,
         -0.3489,  -0.2297,  -0.1626,   0.0864,  -0.0338,  -0.2932,  -0.5562,
          0.2619,   0.2237,  -0.8746,  -0.1350,   0.0957,  -0.5293,   1.3103,
          0.7012,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5814e+00, -2.8266e+01, -1.8772e+00, -2.2687e+00, -1.7516e+00,
        -1.4248e-01,  5.1261e-02,  1.7469e+00, -8.3329e-02, -5.2014e-01,
         5.6537e-01, -3.7424e-01,  1.0456e+00,  1.8167e-01,  2.6773e-02,
        -1.5489e-02,  3.5404e-01, -6.5888e-01, -2.7955e-01,  1.7064e-01,
        -1.3589e-01, -5.5394e-02,  8.6211e-02,  9.2381e-02,  2.4802e-01,
        -6.9967e-04, -2.0647e-01,  4.7298e-01, -3.6271e-01, -6.8616e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2758e-02,  6.6940e+00,  2.6084e-01, -8.6641e-01, -4.9535e-01,
         5.4366e-01,  1.0839e-01, -1.8351e-01, -2.7336e-01, -7.2276e-02,
        -6.4007e-02, -3.8101e-02, -1.3236e-02, -4.5089e-03,  1.0905e-01,
        -4.1296e-01, -3.3148e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -2.9714, -32.0074,  -1.3219,  -2.2685,   1.4013,  -0.5743,   0.4293,
         -0.9320,  -2.0728,   0.3360,   3.3229,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1886e+00,  3.4882e+01,  4.1980e+00,  6.6333e-01, -4.3938e-01,
        -1.4657e+00, -4.0042e-01, -7.8257e-01,  9.4425e-01,  9.9630e-02,
        -1.3560e-01, -2.4228e-01,  1.4720e-01, -7.5366e-02,  2.1483e+00,
         4.4186e-02,  1.1233e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.3482e-02,  8.4046e+00,  4.1284e-01,  1.2497e-01,  5.9575e-01,
         1.4668e-01,  1.3365e-01,  7.7877e-02, -3.9626e-02, -8.7499e-02,
        -1.6561e-02, -2.0833e-02, -2.9575e-02,  2.6521e-02, -2.8661e-01,
         4.1670e-02, -2.6724e-03, -6.1282e-02,  4.1764e-02,  3.4402e-02,
         8.1871e-03, -3.8000e-02,  1.2128e-01,  7.8138e-02,  2.3924e-01,
        -3.9774e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9836e+00,  1.4627e+01,  1.4467e-01, -1.7377e+00, -8.5870e-01,
         3.8624e-01, -6.0921e-01, -2.0821e-01, -2.5777e-01, -5.2776e-02,
         2.1342e-01, -2.6919e-01, -2.6548e-01, -1.3259e-01, -7.0547e-02,
        -1.6880e-01, -4.2314e-02, -6.2515e-02,  1.5995e-02, -2.0539e-01,
        -6.8467e-02, -8.5176e-02, -2.3584e-01,  1.5151e-01, -3.1935e-01,
        -8.2332e-02, -1.2216e-01, -1.4321e-01, -1.2845e+00,  2.4357e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.2954, -22.9369,   0.2479,   0.6818,   0.5947,   0.1331,   0.2401,
          0.1870,  -0.3582,  -0.2518,  -0.4416,   0.6576,  -0.1540,   0.1981,
         -0.0857,  -0.1948,   0.1323,   0.5077,  -0.2936,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-6.8611e-01,  1.6303e+01,  7.4578e-01,  2.1713e-01,  5.8227e-01,
         1.1638e-01,  7.2659e-02,  7.9687e-02,  5.3211e-01,  8.7069e-02,
        -2.6174e-02, -2.2794e-01, -9.9915e-02,  1.3939e-01, -1.1993e-01,
        -4.4509e-01,  2.8534e-01,  4.8372e-01,  8.9209e-02,  3.1799e-01,
        -1.5571e-02, -2.8908e-01,  1.8069e-01, -1.9431e-01, -2.9480e-01,
        -5.3614e-01,  2.6748e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6088e+00, -1.8389e+01, -5.1314e-02,  1.0701e+00, -1.1797e+00,
        -7.7710e-01, -5.4514e-01, -3.6130e-01,  1.1908e+00,  3.9663e-01,
         1.2076e-02, -2.7605e-02, -1.1119e+00,  4.1852e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6160, 24.1237,  1.3763,  0.9790,  0.1676,  0.9115, -0.2065, -0.3893,
         1.2693,  0.2017,  0.0828, -0.2648, -0.0815,  0.4896,  0.1326, -0.1744,
         0.1644,  0.2286, -0.0713,  0.1548, -0.3023, -0.2797, -0.0978, -0.1535,
         1.2125, -0.1413,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4204e+00,  8.2703e+00, -2.4933e-02, -3.1528e-01, -5.2732e-02,
        -1.1607e-01, -8.2249e-02, -1.7443e-01, -2.3700e-01,  1.0483e+00,
        -3.5613e-03, -6.2870e-01, -5.3637e-01,  4.3438e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0658e-01,  1.6536e+01,  5.3738e-01, -1.0098e-01,  4.1479e-02,
        -5.1481e-02,  4.1662e-02, -3.1350e-01, -6.0753e-02, -8.6416e-02,
        -2.3196e-01, -2.1284e-01, -8.4028e-02,  6.7297e-02,  1.3948e-01,
        -2.6360e-02, -6.9168e-02, -2.1534e-02,  1.3088e-02, -2.6548e-01,
        -5.2218e-02,  1.3006e-01, -1.7974e-02,  3.5688e-02,  1.2439e-01,
        -1.5707e-01, -1.6700e-01, -2.1878e-02,  1.1242e-01,  2.3238e-02,
         4.4926e-01,  5.0178e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2641e+00, -2.6579e+01, -1.0180e+00,  1.0838e+00,  3.4730e-01,
        -4.4222e-01,  9.0310e-01,  2.8253e-01, -1.7731e-01, -6.4187e-04,
        -5.1858e-01, -3.8417e-02, -2.0156e-01, -9.6659e-02, -9.1431e-02,
         4.9745e-02,  5.1344e-02,  5.4621e-01, -8.7429e-02, -5.0265e-02,
        -5.6256e-02, -1.4568e-01,  2.1158e-01, -1.0536e+00, -4.0954e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3984, 15.3978, -0.1214, -0.3529, -0.9667, -0.3926,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2335e+00,  2.8171e+01,  2.5529e+00, -4.1951e-01,  9.0903e-01,
         2.9129e-01,  4.4248e-01, -6.3969e-01, -1.6974e-01, -1.0882e-01,
         3.3432e-03, -8.0216e-02,  1.7535e-01,  1.6058e+00, -1.3876e-01,
        -1.4338e-01, -1.1891e-01,  2.1718e-01, -6.7627e-03, -2.1987e-01,
        -3.4698e-01, -3.7461e-01,  1.3508e-01, -1.1916e-01, -3.1275e-01,
        -1.0933e-01,  1.0488e-01,  4.2878e-01, -5.8854e-02, -3.9411e-02,
        -4.8612e-02,  1.4954e-01, -1.0967e-01, -1.6114e-01, -1.2762e-01,
         1.2178e-01,  6.8965e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4370, -7.6204, -0.1910, -0.5080,  0.1291, -0.1341, -0.3193, -0.0613,
        -0.0652, -0.0353,  0.0511, -0.0662,  0.0426,  0.1669, -0.0600,  0.0866,
         0.0576, -0.1599, -0.0848,  0.0599,  0.1371,  0.1007,  0.0836,  0.0629,
         0.0919, -0.0712, -0.0535, -0.0865,  0.0884,  0.0334,  0.0362,  0.0111,
        -0.0542, -0.1546,  0.2560,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9205, 16.3560, -5.5087, -1.1206,  0.2595,  0.2837,  0.2460,  0.0705,
         0.2655,  0.3657, -0.1654, -0.4506, -1.0804,  2.3115, -0.7280,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9876e-01, -1.6145e+01, -3.7523e-01,  5.2153e-01, -3.1630e-02,
         3.5090e-01, -5.4222e-01, -6.6549e-01,  3.1837e-02,  3.3199e-03,
        -1.3256e-01,  1.6528e-02, -7.4152e-02,  1.2538e-01, -4.0092e-02,
         1.2052e-01, -6.5293e-02,  3.9251e-01,  3.2423e-01,  4.5550e-01,
         7.9571e-02,  3.0354e-01, -8.7888e-02,  2.2365e-02,  2.3255e-01,
         3.5386e-01,  1.5315e-01,  1.0215e-01,  1.8236e-01, -9.2679e-02,
         6.2003e-03,  7.4442e-02, -1.7842e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8221e-01, -8.3080e+00,  9.4715e-01,  3.9940e-02,  5.5130e-01,
        -3.5884e-02,  1.8354e-01, -1.8422e-01,  1.3649e-02,  7.7695e-02,
        -8.6504e-02, -8.6140e-02,  1.4582e-01,  3.0831e-02,  1.6004e-01,
         3.6348e-02, -1.0849e-01, -1.3560e-01,  2.6174e-01,  7.1020e-03,
         3.0883e-02, -2.4772e-03, -1.3923e-02,  4.1052e-02, -1.8766e-02,
        -1.3922e-01,  2.1774e-01,  1.4974e-01,  8.5740e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.1756,  5.7093,  0.1987, -0.4670,  0.1313, -0.0073, -0.1874,  0.2316,
         0.0666, -0.4406, -0.0577, -0.0319,  0.0139,  0.5049, -0.7093,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9708e-01,  2.2386e+01,  4.6999e-01,  2.7519e-01, -2.6151e-01,
        -3.4680e-01, -4.9071e-01, -1.2507e-01,  3.5714e-02, -4.7911e-01,
        -1.1122e-02, -1.1356e-01, -4.6270e-02,  2.9195e-02, -7.4047e-02,
        -1.2456e-01, -2.6447e-01, -1.5556e-01,  3.4328e-03, -1.1512e-01,
        -1.7348e-01, -1.7726e-01, -2.1498e-01,  1.3061e-01, -9.6937e-02,
        -9.4771e-02, -1.3077e-01,  2.3249e-01, -4.1644e-01, -5.9444e-02,
         3.8015e-03, -1.9947e-01, -1.5188e-01, -1.3262e-01, -1.8172e-01,
        -2.6283e-01, -9.3893e-02,  7.3860e-02,  1.6641e-02, -1.0264e-01,
        -3.9966e-01, -6.5382e-02,  4.7080e-02, -2.0637e-01, -7.2681e-02,
         1.4454e-01,  7.8186e-02, -3.9263e-01,  1.4802e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5916e-01,  9.3132e+00,  3.3540e-01,  4.3794e-02,  3.8938e-02,
        -3.2027e-01,  3.8558e-02, -4.0578e-02, -7.0860e-02, -3.6518e-02,
        -1.5432e-01, -3.2128e-02,  2.0698e-02, -1.2303e-02, -1.1067e-01,
        -3.9603e-02, -1.8731e-01, -3.2110e-02,  1.2232e-01, -1.0088e-01,
        -8.0341e-02,  1.7254e-03, -9.8924e-02, -3.1561e-03, -8.9749e-02,
        -2.6568e-02, -2.1067e-01,  1.5117e-01,  6.7457e-02,  9.7224e-02,
        -2.0547e-02, -2.7901e-02, -6.9617e-02, -4.5537e-02, -3.7657e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7026e+00, -1.7890e+01, -1.7321e+00, -8.4292e-01, -2.6663e-01,
        -6.5690e-01, -1.6653e-01, -2.0915e-01, -1.0414e-01,  1.4284e-01,
         1.2119e-01, -8.4749e-02,  4.3339e-01,  1.7204e-01, -3.5485e-01,
        -1.3313e-02,  1.5943e-01, -8.1263e-01, -1.1204e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7931, 24.7827, -1.6200,  0.7610,  0.9906,  0.1147,  1.2739, -0.1873,
         0.0297,  0.2925, -0.6877, -0.8878, -0.6485, -2.5112,  0.1985,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4504e-01,  9.3003e+00, -1.5704e-01,  3.6831e-03, -1.1624e-01,
        -1.6961e-02, -4.0979e-01, -2.0530e-01, -2.5620e-01, -1.4974e-01,
        -2.6083e-01, -1.5318e-01, -2.8190e-01, -1.4011e-01,  1.5009e-01,
         1.4166e-01, -6.9016e-02, -1.7395e-02,  1.9898e-01,  3.7208e-01,
        -9.1691e-01, -7.4287e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3220, 11.7599,  0.6155, -0.4894,  0.0428,  0.0676, -0.8040,  0.0605,
        -0.1261, -0.1454, -0.4055, -0.0679, -0.4192, -0.1881, -0.1400, -0.2118,
        -0.0444, -0.0578, -0.1546, -0.0901,  0.0531,  0.2099, -0.0987,  0.2298,
        -0.5690, -0.0849,  0.1042,  0.2143, -0.3793,  0.0503,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -3.6794, -20.4428,   2.1565,  -0.0808,   1.6201,   0.2689,   1.5088,
         -1.1148,  -2.5542,  -0.2090,  -0.7037,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0589e+00,  3.6586e+01,  1.9628e+00,  3.1307e-01,  7.5285e-01,
        -1.1411e-01,  4.2781e-01,  7.5801e-02,  5.5648e-01,  8.5597e-02,
        -2.9257e-01,  3.0333e-01, -5.0548e-03,  3.9601e-01,  1.4560e-01,
         1.4650e-01,  5.3433e-01,  3.4693e-01,  2.3744e-01,  4.2236e-01,
        -1.9343e-01,  1.8802e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0579e-01, -3.0517e+01, -3.3070e+00, -2.3446e+00, -4.9393e-01,
        -1.0780e+00,  1.4243e-01,  4.5058e-01,  5.7975e-01, -2.3523e-01,
        -8.6334e-02, -3.6661e-02, -5.2360e-01,  4.1108e-01, -1.0271e-01,
         4.2560e-02,  3.1922e-02,  1.2966e-01, -4.1843e-01,  1.7664e-01,
         1.9878e-01, -2.3085e-01, -2.1666e-01, -7.3621e-02, -2.9933e-01,
         1.9046e-01,  3.5651e-01,  1.7085e-02,  1.7143e-02, -1.4616e-01,
        -3.8314e-01,  4.2923e-01, -6.7920e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2932e+00, -2.0756e+01, -2.2594e+00,  1.1092e+00, -2.6415e-01,
        -1.7970e-01,  3.0767e-01,  3.9311e-01,  3.9115e-01,  9.9664e-02,
         2.3946e-01,  7.5090e-02,  5.2522e-01,  6.7087e-01,  4.5203e-01,
        -6.0254e-01, -5.5535e-02, -1.5497e-01,  4.4401e-02,  5.4861e-02,
        -1.3074e-01, -7.0197e-02,  6.1681e-02,  3.3210e-01, -1.6597e-01,
        -1.2192e-02,  3.3440e-01, -1.5455e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4965e-02, -9.9765e+00, -1.4302e+00, -2.7262e-01, -6.3661e-02,
        -2.4714e-01,  1.0876e-01,  2.7284e-01, -6.1501e-01,  2.0624e-01,
         1.8451e-01,  4.5831e-02, -2.4660e-01,  1.6106e-01, -3.4893e-02,
        -9.6353e-02, -7.0924e-02, -5.0126e-02, -6.6565e-02, -8.4402e-02,
         1.8352e-01, -1.0007e-01,  3.3430e-01, -7.1709e-02, -8.6032e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-1.5095e-01,  2.7808e+01, -9.5126e-01,  2.3440e+00,  1.7431e-02,
        -1.8842e-01, -1.3072e-01, -1.9172e-01, -5.4781e-01, -6.0593e-01,
         4.6861e-02, -3.1412e-01,  1.2255e-01,  2.3920e-01,  1.2605e-02,
        -1.3011e+00,  2.2755e-02,  1.6835e-01, -2.5612e-01,  5.1474e-02,
         1.8007e-01,  6.5701e-01, -1.0712e-01,  4.2863e-01,  1.8751e+00,
        -4.1307e-01, -5.0317e-01, -6.2406e-01,  6.6392e-02, -7.2512e-01,
        -4.8223e-01, -2.9824e-01,  2.3221e-01, -4.0971e-01, -2.8637e-01,
        -8.6130e-01, -6.0022e-01, -5.9358e-01,  7.4848e-02,  2.2285e-01,
        -5.3100e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -3.3157, -34.5007,  -0.4638,  -0.5297,  -1.2838,  -1.1087,  -0.2895,
         -1.0902,   0.4960,   0.0930,   0.0426,   0.5632,  -0.6944,   0.0455,
         -0.1935,   0.3274,   0.1523,   0.1382,  -0.2564,  -0.2642,  -0.1103,
         -0.4135,   0.6741,   1.1277,  -0.0566,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0449e+00,  2.3070e+01, -1.2528e-01, -1.8139e+00, -1.6263e-01,
        -9.8758e-01, -6.2826e-01,  2.3608e-02, -4.9690e-01, -2.6389e-02,
         6.8217e-01, -1.8649e-01, -2.9997e-01, -3.8487e-01, -2.2893e-01,
        -3.8624e-01,  1.2259e-02, -2.1208e-01, -7.9707e-01, -7.9638e-01,
        -2.0266e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.7200, -42.6748,  -2.0697,  -0.2636,  -0.4564,  -2.2775,  -0.3732,
         -0.7099,   0.2342,   1.2794,   0.5722,   0.3056,   1.0038,  -0.1357,
          0.6019,  -0.2822,   1.9747,   0.1281,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2143e-01,  9.5684e+00, -2.6934e-01,  7.0227e-01,  3.0532e-01,
         9.5595e-02,  2.1131e-01, -1.1313e-01, -7.4852e-02, -1.3042e-02,
         1.7611e-01, -3.8509e-02, -5.6913e-03, -1.4330e-01,  1.1121e-02,
        -9.8706e-02,  1.3990e-02, -1.7101e-01,  1.5115e-01,  1.8008e-02,
        -2.3209e-02,  1.3281e-01,  6.0284e-02,  4.1867e-02,  1.7598e-02,
         5.3783e-02, -1.1214e-01,  2.0216e-02, -5.5731e-02, -3.4206e-03,
         3.9968e-02,  4.6699e-02,  2.0081e-02,  2.8965e-02,  1.0148e-01,
        -4.0508e-03, -1.4308e-02,  9.1580e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4648e+00, -1.5738e+01,  8.9247e-01,  3.6577e-01,  3.0680e-02,
        -2.5607e-02, -8.4946e-04, -3.7149e-01,  2.0816e-01,  1.0608e-01,
         9.0453e-02, -6.7270e-02, -9.1249e-02, -8.1319e-01, -4.5440e-02,
        -5.2469e-01,  2.1406e-01,  3.5906e-01,  3.5761e-01,  4.1380e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2370, 23.9260, -0.5346, -0.2775,  0.4776,  1.1700,  0.3770,  0.8725,
        -0.2508, -0.4972,  0.6363, -0.3841, -1.1570, -2.0117,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  4.0274, -23.0216,   2.0136,   2.0652,  -1.1431,   0.0726,   0.6887,
          0.6910,   0.4236,  -0.3463,   0.4982,   0.0743,  -0.1340,  -0.1981,
         -0.4749,   0.4940,  -0.4354,   0.4419,   0.1851,   0.1502,   0.3447,
          0.0291,  -0.2037,  -0.7379,  -0.0456,   0.6986,  -0.1442,  -0.1215,
         -0.0449,  -0.0232,  -0.5460,  -0.1346,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.5017, -39.4887,  -0.7815,  -0.5478,   1.0197,  -0.1566,  -0.6736,
         -0.0598,  -0.0913,   0.1738,   0.8699,   0.4492,   0.1173,  -0.7219,
          0.6202,   0.2276,   0.1692,   1.1814,   1.7698,   0.4423,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6420,  6.8311,  0.5203, -0.5287, -0.4086,  0.1467,  0.1159, -0.0894,
        -0.0837,  0.0134,  0.0826, -0.1274,  0.0550,  0.0140,  0.0709,  0.1417,
         0.0296, -0.1308, -0.0333, -0.0765,  0.0166, -0.0161,  0.0695,  0.0758,
        -0.0714, -0.0339, -0.0105, -0.2156, -0.0672, -0.0152,  0.1100,  0.0723,
         0.0573,  0.0540, -0.0263,  0.0094, -0.1065,  0.0861, -0.0399,  0.0343,
        -0.0434, -0.0077,  0.1864,  0.1406,  0.2529, -0.0250, -0.0106,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1917e+00, -1.4189e+01,  6.5349e-02, -1.6434e-01, -6.7226e-02,
        -1.1235e-01, -4.2771e-01, -1.0196e-02, -1.1595e+00, -3.4046e-01,
         1.8156e-01, -3.2800e-01,  4.1793e-01, -5.7263e-02,  6.7027e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.3455e-01,  1.0364e+01,  4.9479e-01,  5.0071e-03,  4.3960e-02,
         2.0013e-01,  1.6222e-01,  2.4675e-02,  1.6217e-02,  5.0102e-03,
         7.8854e-02, -8.1217e-02,  9.9830e-03,  2.0179e-01, -4.7200e-01,
         1.7678e-01,  5.7983e-02, -1.4534e-02,  1.0473e-02, -1.8654e-01,
        -3.5043e-02, -1.0292e-01,  6.5936e-02,  1.2099e-01, -1.2560e-01,
        -6.6409e-02,  4.1181e-01,  2.1920e-01, -1.5428e-01,  9.1549e-02,
         1.7736e-02,  4.1678e-02,  1.4740e-01,  4.7841e-02, -1.4164e-03,
         1.0954e-01, -7.2117e-02, -4.9328e-02, -6.2789e-02,  6.2209e-02,
        -3.7372e-03, -9.8895e-03, -1.9900e-02, -8.5650e-04, -4.2649e-02,
        -1.7238e-01, -6.7388e-01, -3.6500e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 8.0907e+00, -3.0911e+01, -3.8917e+00, -4.9038e-01,  7.1653e-01,
         1.2015e+00, -7.0046e-01, -4.6393e-02,  1.6794e-01, -3.2506e-01,
         6.5218e-01, -4.6740e-01, -3.4091e-01, -2.3764e-01, -6.0450e-03,
        -7.0954e-01,  8.5594e-01, -5.3138e-01, -1.5885e-01,  4.2623e-01,
         1.2820e-01,  4.7250e-01,  5.2748e-01, -7.7759e-01, -4.0170e+00,
        -2.1304e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3016e+00, -1.7623e+01,  7.6557e-01, -1.1116e+00, -1.0676e+00,
        -7.7515e-02,  6.5615e-01, -2.4929e-01, -1.7014e-01, -2.5698e-01,
        -3.9052e-01,  2.2757e-01,  2.3354e-02,  1.9856e-01,  4.8548e-01,
         2.4983e-01,  9.5098e-03, -5.3902e-02,  3.7870e-01, -3.4377e-01,
         2.5688e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2043e+00, -2.7877e+01,  5.5184e-01, -4.8477e-01, -1.4809e-01,
         1.2544e+00,  4.5269e-02, -1.2912e-01,  3.2147e-01,  1.9501e-01,
        -3.1654e-01,  2.9868e-01,  1.3109e-01,  1.6608e-01,  6.7737e-01,
        -4.2914e-01, -1.3648e-01,  1.3017e+00, -5.4906e-01,  7.2643e-01,
         1.2776e+00,  3.2320e-01, -1.1200e+00,  9.3228e-02,  2.6276e-01,
         3.9188e-01,  2.8101e-01,  2.7879e-01, -2.1717e-01,  7.7044e-02,
         1.3935e+00,  2.4347e-01, -7.7536e-03,  9.1260e-02,  9.3613e-01,
         1.3329e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.9441e-01,  2.5700e+01,  1.5469e-01, -1.1711e+00,  9.8244e-01,
         7.8268e-01,  1.3068e+00,  4.3151e-02,  5.7492e-02,  4.5977e-01,
        -3.0193e-01, -2.1708e-01, -2.8483e-01, -3.1892e-01,  3.0993e-01,
         3.9178e-01, -2.3101e-02, -3.9598e-02, -2.1566e+00, -4.1430e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7695, 49.3313,  1.2944,  1.4811,  2.0065,  1.1671,  0.1185,  0.5045,
         3.8483,  0.2880,  2.0530, -0.3576, -0.3477, -0.1820, -0.1030, -0.4287,
        -0.2510, -0.9463, -1.3294, -0.1186, -1.6587,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8256e+00,  3.6592e+01,  1.7582e+00, -4.4660e-01, -6.9587e-01,
         1.0517e+00,  6.7419e-01, -1.0300e+00,  2.1235e-01,  6.6913e-02,
        -7.3798e-01,  7.2007e-02,  1.4198e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3182e+00, -1.6745e+01, -8.2899e-01, -1.8720e-01,  1.8461e-01,
         2.1055e-01,  1.8317e-01, -3.1948e-03,  3.6495e-02, -1.9958e-01,
        -3.6305e-01,  2.9016e-01, -7.8773e-02,  5.5056e-02, -3.6634e-01,
         6.8120e-02, -2.9552e-01, -2.5874e-01, -1.7156e-02, -1.5130e-01,
         4.5430e-02, -1.3544e-01, -2.4734e-01, -2.2641e-01, -1.0228e-01,
        -7.7948e-02,  5.4957e-01,  6.2736e-02, -3.3069e-02, -4.4125e-01,
         1.6102e-01, -6.6547e-02, -3.3442e-01, -2.9952e-02, -6.5577e-02,
        -4.4744e-01, -9.3427e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9862e+00, -2.8552e+01,  1.4137e+00, -8.1755e-02,  7.5466e-02,
         7.4570e-02,  1.7621e-01,  3.9007e-01,  7.3003e-02,  3.3668e-01,
         4.1813e-01, -5.6445e-02,  1.1743e-01, -8.1964e-02,  5.4364e-01,
         4.0340e-02,  2.1538e-02, -1.1137e-02, -5.7564e-02,  9.6559e-02,
         1.1822e-01, -2.2163e-01, -2.4398e+00, -1.9217e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5150e+00,  1.2166e+01, -8.2414e-01,  8.2984e-02, -4.2368e-02,
        -3.2308e-02, -1.7139e-01,  2.3346e-01, -1.5721e-01, -4.8207e-01,
        -1.1556e-01, -3.5617e-01,  6.4571e-02,  2.6410e-01,  3.0806e-01,
        -3.5473e-01, -3.4656e-01, -2.7913e-01,  9.9293e-02, -5.2857e-03,
        -4.1897e-02,  1.3575e-01, -1.2327e-01, -2.0384e-01, -1.2465e-01,
        -1.4049e-02,  6.4374e-02,  4.8255e-02, -7.3303e-02,  2.2749e-02,
         5.6669e-01, -7.4107e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7931e+00,  3.9941e+01, -5.0400e-01, -2.1519e-01,  1.2416e+00,
         7.1581e-02, -4.9272e-01, -1.5148e+00,  7.0382e-01, -9.4542e-01,
         2.0467e-02, -6.7617e-02, -5.0784e-01, -2.5830e-01,  9.2042e-02,
         1.3421e-01, -1.0888e+00, -2.7828e-01,  8.4745e-02,  8.0372e-01,
        -1.3366e+00, -6.9765e-02,  2.5162e-01, -1.1151e-01, -1.9493e-01,
        -8.3874e-01, -3.9393e-01,  7.8899e-02, -2.6666e-01, -5.6880e-01,
        -1.0233e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0460e+00,  1.1921e+01, -7.3268e-01, -5.3764e-01, -4.5240e-01,
        -4.9748e-01, -5.1565e-01, -3.0547e-01,  3.5828e-02,  1.0854e+00,
         2.8964e-02, -5.8852e-01,  9.4146e-02, -2.1679e-01, -8.8822e-03,
         4.5336e-01, -1.4480e-01, -3.1471e-01, -2.4894e-01,  2.6372e-01,
         3.1097e-01,  1.4147e+00,  4.9041e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5887e+00,  1.7396e+01,  1.1366e+00, -2.2747e-01,  5.2743e-01,
        -2.6357e-01,  3.0880e-02, -6.5633e-01, -3.9597e-01, -2.1530e-01,
        -1.4510e-01, -8.1658e-01,  1.6642e-01,  2.2573e-03, -1.1248e-01,
         4.8858e-02,  4.9464e-01,  3.6584e-02, -1.5916e-01, -1.0172e-01,
        -2.9107e-01,  1.7958e-02, -1.5034e-02,  1.1277e-01, -8.3241e-02,
         1.4132e-01,  1.7895e+00,  2.5621e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 4.6961e-01, -2.7729e+01, -2.5569e-01,  1.0942e-01,  6.0594e-01,
        -1.8263e-01, -1.1405e-01,  5.0903e-01,  5.9144e-01,  2.9051e-01,
        -2.2889e-01, -2.1653e-02,  5.1417e-02, -5.1972e-02,  1.9796e-01,
        -8.6650e-02,  2.3773e-01,  1.7472e-01,  3.1896e-01, -1.4951e-01,
        -1.3862e-02, -4.9937e-02,  3.9551e-01,  2.3657e-01, -1.7092e-01,
        -3.6798e-02,  1.4467e-01, -6.8773e-02, -3.3467e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9540e-01, -1.7128e+01, -5.6222e-01,  1.8904e-01, -1.3416e+00,
         2.9422e+00, -1.7571e-01,  7.8162e-01,  4.9409e-01, -2.4552e-03,
         2.5928e-01, -3.8050e-01,  5.2761e-01,  1.1135e-01,  3.1100e-01,
        -1.3703e-02,  9.3640e-02,  2.9719e-01, -2.2255e-01, -8.2593e-02,
        -6.0864e-03,  1.1895e+00,  5.9912e-01, -8.6083e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0421e+00,  1.5080e+01,  2.7949e+00,  1.2627e+00,  5.9534e-01,
        -2.9508e-01,  4.6418e-02, -3.3832e-01,  1.6235e-01, -1.9901e-01,
        -2.1332e-01,  5.3603e-01,  1.7399e-01,  1.2734e-01, -1.2440e-01,
        -1.5760e-01, -1.5324e-01, -1.2264e-01, -4.5507e-01, -1.4943e-01,
         5.7790e-01, -2.4859e-03,  1.0362e-01,  2.5306e-01, -7.5222e-02,
        -1.8945e-01, -4.3639e-02,  7.6282e-03, -1.6796e-01,  2.8863e-02,
        -1.8806e-01, -1.0552e-01, -5.8077e-02, -2.2257e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1696e-01, -1.3489e+01, -1.0396e+00,  4.7772e-01, -4.7400e-01,
         1.0666e-01, -3.4559e-01,  8.0784e-02, -3.0879e-01, -2.4617e-01,
        -1.3127e-02, -2.4384e-01, -3.5086e-02,  2.2756e-02, -8.7226e-02,
         4.5521e-03,  1.4091e-02,  3.6166e-01,  5.9092e-02,  2.5461e-01,
         1.1315e-01,  2.1767e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0742e+00,  3.9501e+01,  1.7273e+00, -1.7072e+00,  3.2291e+00,
         4.9003e-01, -7.9324e-01,  2.7560e-01, -1.1554e-02, -4.6181e-01,
        -7.4178e-01,  8.1259e-03, -5.0413e-02, -4.0761e-01,  1.2159e+00,
         7.5814e-01,  3.7493e+00,  1.6548e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2223e+00,  2.4969e+01, -2.0025e+00, -1.8451e-01, -3.3218e-01,
        -1.3580e+00, -5.1769e-02,  5.4681e-01, -1.5847e-01,  1.0175e-01,
         1.6453e-01,  2.9821e-02, -2.6512e-02,  9.9707e-02, -3.0602e-01,
         2.5522e-03, -2.8486e-01, -4.7289e-01, -2.0906e-01, -5.1033e-02,
        -9.8123e-02, -2.6826e+00, -1.4159e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2487e-01, -8.4254e+00,  1.9580e-01, -3.1143e-01,  5.3511e-02,
        -5.1849e-02,  2.4230e-02,  7.6439e-02, -1.7123e-01,  2.3749e-02,
         5.0941e-02, -8.9736e-03,  3.9018e-02, -1.8534e-02, -1.1650e-01,
        -6.5302e-03, -3.8432e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.0758e+00, -6.0355e+01, -1.8337e+00, -5.8656e-01, -1.8505e+00,
        -3.4053e+00,  1.7234e+00,  2.4326e+00,  1.3808e+00,  9.9734e-01,
        -1.5349e+00,  4.3758e-01,  5.2108e-01, -1.6096e+00,  2.5810e-01,
         6.5255e-01, -9.6497e-01,  7.1223e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6680e-01, -1.7909e+01, -5.4939e-01, -4.2318e-02, -2.7309e-01,
         4.2531e-01,  2.4612e-01, -4.1798e-01,  1.6362e-01, -9.7066e-01,
         7.1910e-01,  6.1495e-01, -6.8537e-02, -8.3834e-02, -2.1774e-01,
        -6.4808e-02, -2.1595e-01,  1.5332e-01, -1.1936e-01, -8.4083e-02,
         2.5413e-03,  2.5864e-01,  7.0944e-02, -2.9744e-01,  9.8772e-01,
         1.8650e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3290,  8.9580,  0.9687, -0.2222,  0.1667, -0.3544,  0.7853,  0.1502,
        -0.2013, -0.1687, -0.5214,  0.0507,  0.0305,  0.0161, -0.0247,  0.1811,
         0.0570,  0.1442,  0.0721, -0.0126, -0.1103,  0.0939,  0.0939, -0.0763,
         0.0219,  0.1308, -0.1784, -0.0195, -0.1109, -0.9684,  0.2471,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7018e+00,  2.8633e+01,  2.2490e+00,  9.9259e-01,  4.6840e-01,
         7.2338e-01,  1.5268e+00, -5.0154e-01,  4.7407e-03, -9.2031e-01,
        -2.3741e-01, -4.3142e-01,  4.9178e-01, -1.9154e-01, -7.3555e-01,
        -3.3112e-01,  4.4607e+00,  1.4851e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -5.2120, -27.8185,   2.9683,   3.2795,   0.0468,   0.5681,  -1.8502,
          2.6507,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-1.9599e+00,  9.6531e+00,  3.4447e-01, -2.7819e-01, -1.5978e-02,
         3.3786e-01, -4.5099e-01, -1.1483e-01, -3.8554e-03,  1.3549e-01,
        -8.7313e-03, -3.5425e-01,  1.5449e-01,  1.2272e-01,  1.1052e-01,
         2.2350e-01,  9.1449e-02, -4.0889e-02,  5.1513e-02,  1.1180e-01,
         8.6461e-02,  1.1454e-01, -8.1825e-02, -8.9552e-02,  2.2685e-01,
        -5.9049e-02, -5.5597e-02,  9.7379e-02,  2.0725e-02, -1.0082e-01,
        -2.7091e-01, -4.2876e-02, -5.0653e-02, -6.5613e-02, -1.0106e-01,
         1.3199e-01,  4.5559e-02, -2.4125e-02,  2.5545e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0312e+00,  2.1608e+01, -1.0763e+00,  2.6688e-01,  1.7728e+00,
        -2.1239e-02, -3.7372e-01, -5.0738e-01, -6.3520e-01,  4.3055e-01,
        -2.4575e-01,  2.6681e-01,  8.5438e-02, -3.8735e-02,  3.2656e-01,
        -1.0381e-01, -8.3431e-02, -3.8950e-01, -1.1772e+00, -2.7267e-01,
        -3.1282e-01,  1.3805e-01,  1.6070e-01,  1.5210e-01, -8.3340e-02,
         5.6315e-02, -1.8292e+00, -1.1603e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.2969, -11.9534,   0.1390,  -0.2761,  -0.1502,   0.1656,   0.1347,
          0.3841,  -0.1482,  -0.2065,  -0.5021,  -0.2435,   0.0915,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.5122, -46.1071,   1.3610,   0.6552,  -1.8939,   1.0563,   0.2413,
         -0.8219,  -0.0635,   0.7997,   0.7412,   0.2619,  -0.2103,   0.5446,
          1.5210,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0256e-01,  7.9371e+00, -4.7887e-01, -9.1102e-02,  2.6708e-01,
        -1.9929e-02, -1.6232e-01, -9.0547e-02,  1.1824e-01, -7.7459e-02,
        -3.0274e-01, -1.2199e-01, -3.6658e-02,  6.3063e-02,  2.4019e-01,
        -1.4359e-01, -7.4466e-03,  1.7610e-02, -5.8236e-02, -2.6652e-02,
        -1.1425e-01,  3.5936e-02,  8.4533e-02,  1.0820e-02,  1.2868e-01,
         2.1713e-01,  1.2547e-02,  8.3780e-02, -3.0065e-02, -3.2777e-02,
        -2.1991e-02, -6.6262e-02, -5.7601e-02, -1.1519e-01, -7.3991e-02,
        -7.9151e-02, -2.3027e-01, -3.5726e-02, -2.9273e-03, -1.2708e-02,
        -2.8685e-02, -3.7316e-02, -4.3872e-02,  8.3168e-02, -2.6964e-02,
         1.6563e-02, -2.9749e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1806e+00, -2.9100e+01, -1.2355e+00,  3.9977e-02,  5.2702e-01,
         1.0151e+00,  1.7140e-01,  3.3476e-01, -4.6353e-01,  2.5843e-01,
        -3.1077e-01, -9.7932e-01, -3.3379e-01, -3.6120e-02, -4.1623e-02,
         5.4299e-01, -2.0878e-01,  1.3485e-01,  1.7143e-01, -9.2801e-02,
        -3.1448e-01,  4.0021e-01, -1.7610e-01, -4.6988e-01,  3.1714e-02,
         4.1349e-02,  2.3022e-01,  2.0452e-01,  1.4652e-01, -2.3751e-01,
         3.4129e-01,  8.2792e-02, -5.4084e-01, -5.3914e-01, -2.1291e-01,
         1.8441e-02,  4.4848e-01,  1.1614e-01,  1.9507e-01, -3.2566e-01,
        -3.0372e+00,  1.4645e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3249e-01,  3.6112e+01,  3.4103e+00, -6.3805e-01,  9.9346e-02,
         5.7684e-01,  8.1798e-01, -3.3687e-01,  8.7892e-01,  4.5834e-01,
         5.8605e-01, -6.9002e-01, -1.8573e-01,  9.5070e-03,  6.6982e-01,
         2.5534e-01,  1.0702e+00,  4.4469e-02, -1.7175e-01,  4.5964e-01,
        -5.1176e-01, -4.7615e-01, -4.3668e-01,  1.0396e-01, -8.8763e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5949e+00,  2.6885e+01, -2.7017e-01, -8.1638e-01,  4.2304e-02,
        -5.0680e-01,  1.5930e-01,  5.3280e-02,  3.9890e-02,  1.3167e-01,
         2.7052e-01,  2.3308e-01,  3.5423e-01, -3.4544e-01, -2.7180e-01,
        -6.8552e-01,  1.9201e-02,  3.3953e-01, -1.2126e-01, -4.4660e-01,
         1.2484e+00, -1.2919e+00, -1.0180e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2307e-01, -2.4479e+01, -2.7347e+00, -3.3379e-01,  5.8029e-01,
        -3.2200e-01,  1.9087e-01,  5.9363e-01,  1.1317e-01,  6.6476e-01,
        -3.2949e-01, -1.1614e-01, -2.5167e-02, -4.2321e-01,  6.6527e-02,
         2.0397e-01,  3.3905e-01, -2.3172e-01, -5.1554e-02, -1.1629e-02,
         1.9382e-01,  2.0342e-01,  3.2839e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3476, -4.4264, -0.6600, -0.0083,  0.0785,  0.1316,  0.0236,  0.2899,
        -0.3394, -0.0951,  0.1518, -0.3834,  0.2909,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8481, -7.0653,  0.6063,  0.3383, -0.1150,  0.1499, -0.0096,  0.1139,
        -0.3554, -0.3468,  0.3248, -0.1068, -0.0077,  0.0775,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0804e-01, -1.4132e+01, -4.9284e-01,  1.4621e-01, -8.8040e-01,
         1.9447e-01, -1.1509e-01,  5.1809e-01,  1.6988e-01,  4.3313e-02,
        -3.7275e-03,  5.6650e-02,  4.9171e-02,  9.1399e-02, -1.8485e-01,
        -7.3982e-02,  4.1971e-01,  2.7994e-01,  6.2837e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 2.9164e-01,  9.9837e+00, -4.2637e-01,  3.1635e-02, -4.3691e-01,
         2.4850e-01,  6.4825e-02, -2.6040e-01,  7.4223e-02,  2.8428e-01,
        -7.9087e-03,  2.0757e-02, -2.2363e-02, -1.9055e-01,  5.8627e-02,
        -1.1750e-01,  2.7764e-02, -5.5862e-02,  8.0750e-02,  1.7082e-01,
         1.4754e-01, -1.2399e-01, -1.2041e-01,  5.6451e-02,  4.9171e-02,
         8.7273e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.2809, -34.7275,  -2.0952,   0.1450,  -0.7262,   0.7323,   0.4170,
          0.6629,   0.4153,   1.6430,  -0.8927,   0.1883,   0.1940,  -0.7495,
          0.5503,  -2.1682,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5261e-01, -2.5727e+01, -1.5583e+00, -3.5732e+00, -6.9053e-01,
        -4.7961e-01, -1.1707e+00, -6.6935e-01,  1.1761e+00, -1.1061e-01,
        -2.1743e-02, -6.2507e-02, -6.3226e-02, -4.0998e-01, -1.2993e+00,
        -4.2670e+00, -8.9726e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3950,  6.9983, -0.1534, -0.6899,  0.4583, -0.2865, -0.1333, -0.2328,
         0.1853,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4430e-01, -9.4929e+00,  6.1953e-01,  5.9402e-01, -5.5310e-01,
        -5.6640e-01,  2.2186e-02, -8.0228e-01, -2.0608e-01, -2.0909e-01,
        -2.3073e-01,  8.9799e-03,  1.1852e-02,  1.8713e-01, -2.6427e-01,
        -3.1280e-02,  5.8931e-02, -5.1973e-02, -1.9406e-01, -3.0827e-02,
        -1.0958e-01,  2.0368e-01,  1.5370e-01, -2.2230e-01,  3.6987e-02,
         1.2656e-01,  4.9973e-01, -1.7572e-01,  4.0390e-01,  2.6843e-01,
         1.7122e-01, -9.3785e-02,  3.3665e-02,  4.9706e-02, -1.0636e-01,
        -4.3927e-01, -3.9401e-02,  2.5184e-03, -7.7605e-02,  4.6432e-02,
         1.8047e-02, -1.0818e-01,  3.3097e-01,  2.1728e-01, -9.4966e-02,
        -1.9254e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1228e+00,  3.1421e+01, -6.9633e-01, -4.9242e-01,  3.4382e-01,
        -7.8926e-01, -2.8398e-01,  7.7632e-01,  2.2803e-02,  4.2042e-02,
        -5.4646e-02,  4.5850e-01,  4.9065e-01,  2.8133e-01,  4.2905e-01,
         1.0243e+00,  2.3495e-01,  3.9111e-01, -3.2867e-01,  6.9933e-01,
        -9.7034e-01,  5.7037e-01, -4.3570e-01,  1.2477e-01,  5.8282e-01,
         1.9655e-02, -3.8942e-01,  1.8934e-01, -8.4915e-01,  1.7236e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.9594, -27.1482,   0.0797,  -1.3228,  -1.5716,  -1.6381,  -2.7948,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0443e+00, -2.2457e+01, -1.0381e-01,  9.1491e-02,  2.1349e-02,
         3.8412e-01, -5.3394e-01, -3.7399e-01, -3.9817e-01, -5.2657e-01,
        -4.7213e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7628, 11.9453,  1.2872,  1.6262, -0.1297,  0.4151,  0.2659, -0.0685,
         0.2044,  0.7693,  0.3518,  0.7223,  0.5528, -0.4783,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7656e-01, -1.8815e+01, -2.9608e-01,  1.8634e-02, -2.9676e-01,
        -5.9993e-01, -1.1860e-01, -4.2489e-01,  1.0827e+00,  4.5061e-01,
        -6.9948e-02,  5.1137e-02,  5.3998e-02,  2.3050e-01, -3.4840e-02,
         2.7958e-01, -1.1787e-01,  2.5354e-01,  4.5853e-02, -1.9831e-01,
        -2.1135e-02, -2.5555e-01,  1.1917e-01, -4.2343e-02, -1.0011e-01,
         9.5346e-02,  2.6940e-01, -1.3313e-01,  2.2907e-02,  5.5697e-02,
         4.2768e-01,  2.4941e-02,  5.6017e-01, -1.1329e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7249e-01,  9.4738e+00,  4.1304e-01,  1.4769e-01,  4.5192e-02,
         2.7449e-02, -2.9168e-02,  1.5412e-01, -2.6600e-03,  1.2620e-01,
        -1.2076e-02, -1.7168e-01, -7.6597e-02,  3.2724e-01, -1.0627e-01,
        -6.9692e-03,  1.8134e-01, -2.6905e-01, -1.4717e-02, -1.0433e-01,
         1.3937e-02,  1.2974e-02, -1.7786e-02,  1.8928e-01,  1.7600e-02,
        -9.0022e-02, -6.4994e-02, -4.0162e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6375e+00,  3.4039e+01,  8.1142e-01,  9.0852e-01,  4.8379e-01,
         2.7706e-02,  3.4592e-01,  3.7770e-01, -5.1065e-02,  3.7607e-01,
         6.1887e-02, -1.5419e-01, -4.0412e-01,  5.7544e-01, -1.5577e-01,
        -7.6494e-01,  5.6712e-01,  8.4116e-01, -6.9182e-01, -2.2790e-02,
        -4.9769e-02, -1.7097e-01,  3.9736e-01, -1.7804e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 2.1518e+00,  1.0583e+01, -1.8304e+00, -3.8360e-01, -3.7110e-01,
        -6.1072e-01, -2.7526e-01, -1.0565e-01,  1.9597e-01, -1.0143e-02,
        -4.3034e-01, -2.0884e-01, -2.5125e-02,  8.6302e-02,  8.4802e-02,
        -1.8688e-01, -2.3043e-01, -2.8186e-01,  5.7629e-02,  4.1140e-01,
        -1.6050e-01,  1.6999e-01, -1.3963e-01, -1.4853e-01,  7.4829e-02,
         2.7377e-01, -6.9478e-02, -2.2100e-02, -2.1689e-01, -5.8783e-01,
         4.2651e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5507,  9.8606, -0.3917,  0.2657,  0.1641,  0.5187,  0.2835, -0.0889,
         0.1219,  0.2789,  0.4109, -0.4386,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0086e+00,  6.1787e+01,  4.3391e+00, -3.9552e+00, -1.0268e-01,
        -1.4021e+00, -5.0538e-01,  1.7674e+00, -1.5262e+00, -3.0218e-01,
        -2.0019e-01, -5.3944e-01,  4.3295e+00, -2.4434e+00,  5.3248e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2931e+00, -2.2419e+01,  9.8116e-01, -1.7387e-01, -1.0861e-02,
         2.7430e-01, -4.3669e-01, -6.0445e-01,  4.1260e-02,  1.8582e-01,
         4.8306e-02, -2.2318e-01, -1.6024e-01,  3.4784e-01, -3.9086e-02,
        -2.8313e-01, -1.9082e-01,  1.6413e-01, -1.1400e-01,  3.4145e-01,
        -1.0937e-01, -2.0984e-01, -4.1009e-01, -1.2349e-02, -4.7950e-01,
        -2.0231e-01, -3.0961e-01,  3.0020e-02, -8.3153e-02, -1.9578e-01,
        -1.9160e-01, -1.0145e+00, -1.2386e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.6062, -16.4406,   0.6242,   1.9294,  -0.8681,  -0.2123,   0.4343,
          0.1281,   0.2649,  -0.2581,  -0.0243,  -0.2213,   0.3306,   0.8642,
          0.2040,   3.2200,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.0384, -12.2256,   2.4895,   0.3275,  -0.2832,   0.0310,  -0.9036,
          0.1547,   0.5085,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9307,  3.6713, -0.7315, -0.4886,  0.5506,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1329e+00,  5.1983e+01,  6.2925e+00, -2.7066e+00, -3.0748e-01,
        -1.6522e-01, -1.0681e+00,  9.9974e-02, -6.4100e-01, -1.8580e+00,
         4.9972e-02,  2.0893e+00, -5.2032e-02, -2.8142e+00, -2.6985e-01,
         2.0949e-03,  1.3705e-01, -2.1352e-01,  6.4787e-01,  4.8337e-01,
        -5.4372e-01, -4.1944e-01, -7.8245e-01,  5.8761e-01, -4.3375e-02,
        -1.6921e+00, -5.7325e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -2.0537, -48.7655,  -5.9350,   3.5269,  -1.6720,  -5.3389,  -0.2060,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2662e+00, -1.9403e+01, -1.9107e-02, -6.4234e-01, -8.5437e-01,
        -7.7797e-02, -2.8161e-01, -2.0648e-01,  1.6555e-01,  7.0087e-01,
        -1.0749e-01, -2.3648e-01, -2.1783e-01, -3.3351e-01, -2.0948e-01,
        -3.5161e-01, -7.3081e-02, -6.6634e-01, -9.0403e-01,  4.5371e-01,
         1.8011e-01, -1.0819e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9047e+00,  3.8188e+01,  2.1042e+00, -1.5922e+00, -5.4255e-01,
         1.3433e+00, -1.1432e+00,  1.1719e+00, -1.3062e-01, -5.3840e-01,
         9.6212e-01,  1.3827e-01,  4.1136e-01,  1.3088e-01, -2.2584e-01,
        -2.9616e-02, -7.5802e-01,  1.5926e+00,  3.9542e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4312e-01,  6.5871e+00,  5.1943e-01,  5.7477e-03,  9.5850e-02,
         2.2756e-01,  1.1869e-02, -1.4954e-01, -2.8983e-02, -8.4404e-02,
         4.4936e-03, -2.0268e-02, -5.4657e-02,  2.8186e-02, -1.4667e-02,
        -6.3139e-02,  1.1674e-01,  9.0871e-02,  1.0368e-02, -1.8122e-02,
         8.7724e-02,  1.1134e-01, -3.4746e-03,  1.8074e-01,  1.5681e-01,
         2.8722e-02, -4.9673e-02, -3.6425e-02,  1.0633e-01, -1.6513e-01,
         5.6287e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-2.1580e+00,  5.0038e+01,  9.1270e-01, -1.0624e+00,  8.2954e-03,
        -3.3705e-01, -2.3221e-02, -4.1388e-01, -2.5526e-01,  2.7149e-01,
         3.3984e-01,  2.5756e-01,  1.2369e+00,  1.3276e+00, -1.5865e-02,
         6.4954e-02,  8.9646e-01,  8.1501e-01, -2.6491e-01,  2.6937e-01,
         1.8283e-01, -4.7393e-01, -8.4464e-02,  1.4380e+00, -4.6847e-01,
        -8.1333e-02,  2.9952e-01, -2.7797e+00, -3.8604e-01,  1.1165e+00,
         8.9733e-01, -8.9676e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8370e+00,  3.5757e+01,  1.2315e+00, -3.7146e-02, -1.7515e+00,
        -6.5452e-01, -1.7626e+00, -2.9707e-01,  2.7686e-01, -6.6025e-01,
         8.7511e-02,  8.5674e-01, -5.5425e-01, -2.1795e-01, -2.3494e-02,
         2.1251e-01,  6.0026e-02, -5.9069e-01, -2.8570e-01, -3.9156e-01,
        -4.0479e-02, -4.3575e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6545e+00,  3.2324e+01, -7.1960e-01,  5.6287e-01,  4.1818e-01,
        -4.6137e-01,  1.9525e-01, -2.0805e-02,  4.0593e-01, -2.3623e-01,
        -2.8164e-02, -2.7920e-01, -3.4035e-02,  2.3639e-01, -3.0102e-01,
         1.3177e-02,  1.8390e-01,  5.8442e-01, -3.4314e-01,  5.6505e-01,
        -3.6457e-01, -5.8491e-01, -5.3163e-02, -5.1024e-01,  5.5572e-01,
        -1.9585e-01, -1.4812e-01, -3.5761e-01, -5.1562e-01, -3.5962e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2364,  5.1294, -0.2756, -0.0742, -0.0814, -0.2512,  0.0460, -0.1372,
        -0.1953, -0.1560,  0.1350,  0.0443, -0.0080,  0.1047, -0.0743,  0.0744,
         0.1877, -0.1254,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8693e+00, -4.3389e+01, -6.6115e+00, -2.3328e+00, -8.6489e-01,
         2.2408e+00,  1.1216e+00, -3.5049e-01,  1.4621e+00,  2.3429e-01,
         6.9699e-01,  3.0044e-01,  6.7167e-01, -7.2393e-02,  1.6509e-01,
         9.4204e-01, -1.3337e-02, -1.8029e-01, -9.6085e-02,  8.2325e-01,
         1.7957e-01,  6.1983e-01, -5.7373e-02, -3.0772e-01,  6.2069e-02,
         2.6650e-01, -7.4021e-01,  4.0759e-02,  4.5665e-02, -1.0503e-01,
         1.3234e-01, -3.4869e-01, -1.8680e-01, -1.8245e-01, -1.1935e-01,
         3.1144e+00,  6.9469e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3770e+00, -3.2488e+01,  4.8412e+00, -3.2488e+00, -3.3329e-01,
        -1.2426e-01,  3.5932e-01, -8.9215e-02,  1.4606e-01, -9.9453e-02,
         1.4072e-02, -3.8474e-01, -9.6328e-02,  4.0558e-01, -1.6262e-01,
        -2.0386e-01, -1.1893e-02,  4.2397e-02,  4.1597e-01, -8.1223e-01,
         1.9229e-01,  6.9134e-01,  4.7462e-01, -1.9639e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1759e+00,  1.4728e+01,  8.0770e-01,  7.7873e-02,  3.6633e-01,
        -4.4168e-02, -4.2595e-01,  1.7002e-01, -1.5206e-01,  2.1259e-01,
        -1.3154e-01,  3.8457e-02, -2.6150e-01,  1.6184e-01,  4.7697e-02,
         6.1897e-03, -9.0076e-02, -1.0572e-03,  5.0159e-02,  9.9286e-02,
         7.8350e-02,  7.6439e-02,  1.4598e-01, -7.9023e-02,  2.5303e-01,
         2.1791e-01,  8.3358e-02,  2.9805e-01,  4.0610e-02, -5.8041e-02,
        -1.3508e-01,  3.3136e-01,  1.2451e-01,  8.4363e-02, -1.8032e-02,
        -2.7411e-01, -1.1449e-01, -8.7971e-02,  3.2603e-02, -6.2522e-02,
         3.6867e-01, -3.4057e-02,  5.8850e-02, -1.4660e-02, -4.5414e-02,
         1.9899e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2325e-01,  1.5218e+01,  2.0196e+00,  2.7102e-01, -1.3511e+00,
         4.9169e-01, -4.4899e-01, -2.4556e-01,  1.6632e-01, -2.4826e-01,
         3.9043e-01, -3.0093e-01, -1.1395e-01, -1.4659e-01,  1.1839e-01,
        -5.2287e-02, -1.8471e-01, -9.0124e-01, -7.0643e-01, -2.7707e-01,
         2.5776e-03,  1.4694e-01,  1.3686e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.2520, -21.4590,   0.3834,  -0.8007,   0.6991,  -0.6817,   0.4396,
          0.4880,   0.2315,   1.0700,   0.5980,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9696, 10.8731,  0.6799, -0.6419,  0.2822, -0.3112,  0.0648,  0.3815,
        -0.0576,  0.1727,  0.3463,  0.1289, -0.1832,  0.0427,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1623, 11.8336, -0.2131, -0.0621, -0.0711, -0.1467, -0.8398, -0.5706,
         0.2880, -0.5895,  0.2643, -0.0800,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3590e+00,  4.4591e+01,  3.0323e+00,  3.0193e+00,  1.7568e-01,
        -3.6763e-01, -1.6281e-01,  7.4223e-01, -7.5358e-01,  3.5909e-02,
        -4.7223e-01, -6.5659e-01, -5.3329e-01, -1.2055e-01,  3.8495e-04,
        -6.0592e-02, -1.4590e-01, -3.3054e-01, -3.0447e-01, -6.6703e-01,
        -2.6740e-01,  1.2099e-01,  7.1146e-02, -7.6943e-02,  2.8792e-01,
         2.8945e-01,  6.3910e-01, -1.1835e-01, -3.0792e-01, -5.2958e-01,
        -1.7468e-01,  3.1410e-01, -3.2347e-01,  6.3124e-01, -1.2010e-01,
        -1.1764e-01, -1.0915e-02, -1.6083e-01, -2.3671e-01, -8.7670e-02,
         1.2624e-01,  1.1043e-01,  4.4329e-01, -1.5191e-01, -3.1690e-01,
        -2.4164e-01, -2.3440e-01, -7.5629e-02, -2.2414e-01, -4.7455e-01,
         3.5944e-01, -1.3795e-01, -9.3598e-02, -2.0108e-01, -6.3034e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 6.4128e-01,  4.2693e+01,  1.2406e+00,  9.3879e-01,  2.1942e-01,
         1.6371e+00, -2.4024e-01, -3.4151e-01, -3.1309e-01,  3.4034e-01,
        -3.1776e-01,  4.2360e-01,  8.8716e-01,  6.9015e-02, -2.0268e-01,
        -2.1839e-01, -1.4321e+00,  4.1292e-01,  8.6207e-03,  9.9477e-01,
        -1.2196e+00,  1.1632e+00, -1.5269e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.5741e-02, -1.0154e+01, -1.2300e+00, -4.9669e-01, -1.6886e-01,
        -1.3995e-01,  4.2040e-02, -6.8816e-02,  3.3702e-01, -2.8586e-01,
        -1.4833e-01, -1.9319e-02, -1.1399e-01,  2.3922e-01,  5.7161e-03,
        -1.7161e-03,  5.4288e-01, -1.5548e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7505, -9.8440, -1.5554,  0.2820,  0.3890, -0.5008, -0.6813,  0.4045,
         0.2644,  0.2916,  0.1812, -0.1096,  0.1627,  0.3339,  0.3183, -0.2625,
        -0.0747,  0.0411,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0608e+00, -2.5393e+01, -3.8146e-01, -1.8498e-01,  1.1166e+00,
        -6.3321e-01,  9.8030e-01,  4.5833e-01,  3.0289e-01,  3.7893e-01,
        -1.0639e-02, -1.6356e-02, -4.4910e-01, -6.3087e-02,  3.6202e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.0227e-01,  4.6587e+01,  1.4445e+00,  5.6102e+00,  1.5968e+00,
        -1.2483e-01, -4.1479e-01, -8.6881e-01, -8.0450e-01,  4.5721e-01,
         4.2425e-02, -1.4851e-01,  1.0172e+00,  5.8537e-01,  5.3069e-01,
         9.2693e-01,  1.1558e+00, -1.1848e-01, -1.6406e-01,  5.8364e-01,
         6.5095e-01,  4.7416e-02,  1.0979e-01, -2.3053e-01, -1.0778e+00,
         1.3446e+00,  2.5973e+00, -2.4196e-01, -2.3778e-01,  4.6173e-01,
         3.8636e-02, -1.6058e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2653e-01, -2.4482e+01, -6.8316e-01, -2.6710e-01, -6.2079e-01,
        -3.7155e-01,  2.9807e-02, -2.2465e-01, -7.1950e-02, -1.7410e-01,
        -3.8331e-01,  1.3122e-01,  8.3970e-02,  3.8325e-02,  9.3908e-02,
        -1.9517e-01,  4.2571e-02,  1.0748e-02,  4.2880e-02,  2.2359e-01,
         5.2534e-01,  4.2560e-01,  8.8243e-01, -1.4280e-01,  1.2935e-01,
         1.4961e-01,  1.1146e-01, -2.6777e-01, -1.0160e-01,  8.2153e-02,
         3.9639e-01, -5.9912e-02, -1.2366e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4741e+00, -7.4439e+00, -1.4052e-01,  5.1599e-02, -2.8020e-01,
        -2.7874e-01,  3.1194e-02,  1.2991e-01,  8.7131e-02,  3.6593e-01,
        -5.2246e-02,  6.2925e-02, -2.7606e-01, -2.7930e-01, -4.0244e-01,
         9.3119e-02,  1.8845e-02,  2.0759e-02, -6.0134e-03,  3.2569e-02,
         5.5813e-02,  6.3371e-02,  4.7505e-02,  2.8402e-01, -2.5304e-02,
        -2.2085e-02, -8.9630e-02, -2.1105e-02, -5.9799e-02,  1.5898e-03,
         1.9021e-02,  7.1016e-02, -2.0109e-01, -1.2513e-01, -9.5660e-02,
         1.1563e-01, -4.5994e-02,  3.3665e-02,  2.3045e-02,  2.1923e-02,
        -7.1413e-02, -2.0885e-02, -8.4379e-03, -6.9395e-02, -2.7994e-02,
        -8.4908e-02, -9.9195e-03, -1.4829e-02, -5.8476e-02, -5.0474e-02,
        -1.6891e-02, -3.4913e-02, -2.4953e-02, -2.5132e-02, -1.3917e-02,
        -6.3796e-02, -3.1108e-01,  1.4962e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5356, 33.0849,  3.8620, -4.4724,  1.1295,  0.3489,  0.4976, -0.1647,
        -0.2235,  1.2062,  2.1129, -1.2450, -1.0756,  2.5834,  4.1327, -0.1347,
        -0.2349,  0.7837,  0.4975,  1.2714, -2.0865, -0.2158, -1.5498, -2.0907,
         3.3696,  3.6298,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7508e-01,  2.0006e+01, -9.5733e+00, -5.9885e-01, -1.9739e-01,
        -6.6820e-02,  9.2861e-01,  3.3946e-02,  1.0898e+00,  1.2824e-01,
        -2.6884e-01,  2.7885e-01,  4.5940e-02,  1.4217e-01,  2.9196e-01,
        -6.1234e-01,  4.4105e+00, -8.6900e-02,  3.7801e-01,  1.4445e-02,
         3.1970e-01, -1.5906e-01, -2.8062e-01, -1.7771e-01, -1.0967e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9708e-01,  4.3716e+00,  9.6634e-01, -1.6363e-01, -1.8246e-01,
         2.2277e-01,  1.0322e-01, -2.9433e-01, -1.7739e-01, -3.4149e-01,
        -2.0716e-01,  4.1865e-02, -2.0546e-02,  2.3380e-02, -1.4545e-02,
         1.4383e-01, -8.8909e-02,  2.8378e-02, -1.1932e-02, -6.0390e-02,
         2.4822e-02,  3.1532e-02,  2.1931e-02,  2.3758e-02, -1.3468e-01,
        -6.3160e-02,  9.0544e-04,  1.4978e-01, -1.4347e-02, -1.8107e-02,
         2.8475e-02,  1.8856e-02,  2.1920e-02,  1.9006e-05,  4.8329e-02,
         1.7579e-02, -5.2990e-02, -2.4691e-02,  4.5101e-02,  2.3798e-03,
        -4.6659e-02, -3.9639e-02, -6.0839e-02, -1.0104e-01, -1.6023e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2316e+00,  2.2162e+01,  7.8909e-01,  3.7573e-01, -1.8328e+00,
         1.3001e-01, -3.9955e-01,  4.3843e-01,  5.7457e-01, -1.1059e-01,
        -3.3051e-01, -3.8468e-01, -1.0290e+00, -6.9060e-03,  7.4976e-02,
         6.6475e-01, -6.7062e-01,  1.9753e-01,  1.4066e-01,  5.0729e-02,
         2.5818e-01, -1.8769e+00,  9.1216e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.3371, 24.7497, -0.9447,  0.5932,  1.3890,  0.7039, -0.6918,  0.1353,
        -0.2130, -1.4468,  0.1676,  1.2381,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-4.2951, 45.8964,  2.6094,  1.1319, -0.3324,  0.3399,  3.0176,  0.6584,
         0.7288, -1.2880, -0.1230, -0.2059,  0.1689, -0.4530, -0.5410, -0.5983,
        -0.3824, -0.0713, -0.1891,  0.1383, -0.4813,  0.5332, -0.9352,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0604, 22.5650,  0.4897, -0.1375, -0.3405, -0.2021, -0.6593,  0.8238,
         0.6742, -0.1926, -0.0644, -0.0867,  0.4768,  0.4667, -0.2418, -0.2208,
        -0.6359, -0.5972,  0.4612, -0.2178,  0.4451, -0.1195,  0.0306, -0.2895,
         0.0633,  0.0716,  0.2283,  0.0486, -0.1935,  0.1618,  0.3124,  2.1623,
         0.9138], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2466, 19.7584,  1.8535,  0.0359, -0.0589,  0.4964, -0.2348, -0.4185,
        -0.1716,  0.4573,  0.1218, -0.0277,  0.2567, -0.1832,  0.4192, -0.3807,
        -0.3235,  0.3101,  0.1029,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.9170, -16.6852,   0.5742,  -0.8024,   0.4935,  -0.5243,  -0.4350,
          0.5703,  -0.1086,  -0.0391,  -0.1528,   0.2154,   0.4590,   0.2650,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3213e+00, -1.2627e+01, -5.6514e-01, -2.6211e-01, -1.1801e+00,
        -3.1846e-01, -5.2342e-02,  5.8034e-02, -3.0357e-01, -1.2133e-01,
        -1.4079e-01, -7.8249e-02, -3.0665e-01,  1.1025e-02, -6.3416e-02,
        -1.5856e-01,  5.6504e-02, -1.4542e-01, -1.9084e-01,  1.1849e-01,
         8.3459e-01, -6.2854e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3944e-01,  8.7148e+00,  2.2010e-01,  3.1507e-01, -1.1158e-01,
         9.7613e-02, -5.2906e-02, -2.3497e-01,  1.3431e-01, -1.2559e-02,
         1.7825e-01,  1.8317e-01,  9.8770e-03, -1.0489e-02,  1.4501e-03,
        -2.4391e-02,  3.3007e-01,  1.1585e-01, -8.9508e-02,  7.4642e-02,
         4.9128e-02,  6.1335e-02, -5.8347e-03, -1.0967e-01,  1.9369e-02,
         3.3812e-02, -2.7145e-02,  1.2731e-01,  2.2126e-01,  5.9358e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9814e+01,  2.8755e+01, -9.8173e-02, -2.1363e-02,  2.9478e-01,
         1.1426e+00, -2.1636e-01, -2.1687e+00,  3.0284e-01, -5.6174e-01,
         8.2767e-01,  2.5976e+00,  9.4298e-01,  9.1753e-01,  5.4315e-01,
         7.9021e+00,  7.1652e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2371,  7.3088,  1.2993,  0.4012, -0.4139,  0.0557, -0.4422,  0.7296,
        -0.9864,  0.5647,  0.1457,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3050, 70.4582,  2.2859, -1.1903,  2.2651, -2.8361,  0.8030, -0.4861,
        -0.6647, -0.3217, -0.8422, -1.6246,  0.2585,  2.3978, -1.3271, -4.4045,
         2.4777,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.3319, -37.8473,  -1.9604,  -0.4965,  -1.2648,   0.4828,   0.6361,
          0.0677,  -0.6277,   0.4149,   0.2374,  -0.3673,   0.6735,   0.7604,
          0.2284,  -0.5338,  -0.0627,   0.1600,  -0.0383,  -0.1570,  -0.0469,
         -0.3448,   0.2672,  -0.0588,  -1.5519,   2.4950,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0292e+00,  3.0849e+01,  3.7879e+00, -6.1845e-01,  1.0166e+00,
         1.0932e+00, -7.7377e-01,  1.2410e-01, -1.0711e+00, -2.3414e-01,
         3.9585e-01, -1.1233e-01, -6.7043e-01, -5.4480e-01, -6.8695e-01,
        -1.1360e-01, -6.5835e-01,  1.3626e-03, -1.6512e-01, -5.4841e-01,
         3.4121e-01,  5.2407e-03,  9.1287e-02,  1.3986e-01,  5.2992e-01,
        -2.1389e-01, -7.3735e-02,  5.1707e-01, -1.5199e+00, -2.9439e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.4764, -10.8850,   0.1774,   0.4994,  -0.3821,  -0.0542,  -0.4593,
         -0.1028,   0.0273,  -0.0869,   0.2610,   0.5661,   0.0243,  -0.0409,
          0.0585,  -0.0987,   0.0115,   0.1329,   0.3577,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 7.8430e-01,  3.9090e+01,  5.1953e+00,  1.3620e+00,  4.8119e-01,
        -1.9076e-01,  9.7059e-02, -1.3573e-01,  4.8346e-01, -1.5344e-02,
        -3.6797e-01, -4.0328e-01, -1.7993e-01,  4.9848e-02, -4.5529e-01,
        -2.0909e+00,  2.2883e-01, -1.2765e-01,  1.4085e-01,  2.4732e-01,
        -1.6958e-01, -8.8496e-01,  1.6875e-01,  8.9963e-02, -1.4764e-01,
        -1.2026e+00, -6.4494e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5271, -5.9316, -1.2233, -1.6453, -1.0125, -0.1242, -0.0437, -0.6098,
        -0.2281, -0.6744,  1.7074, -1.4438, -3.5482,  0.7654,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3124e-01,  3.6323e+01,  2.2118e+00, -4.4432e-01, -1.9082e-01,
        -8.5450e-01,  7.0578e-01,  4.9199e-01, -3.8012e-01,  8.1737e-02,
         1.6641e-02,  5.1936e-01, -1.3013e-01,  8.5739e-01, -9.1328e-02,
        -6.4281e-02, -1.8749e-02, -1.2910e+00, -4.0618e-02, -1.3583e-02,
         1.1948e-01, -1.4205e-01,  6.7000e-01, -9.9920e-02, -6.4509e-01,
        -5.6626e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5289, 12.2932, -0.2029,  0.1712, -0.4956, -0.3614, -0.1481,  0.1507,
        -0.2843, -0.3117,  0.0627, -0.4747,  0.6689, -0.4526,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8415e+00,  1.3876e+01, -9.4994e-02,  3.0158e-01, -6.0729e-02,
        -1.3137e-02, -7.0939e-01, -2.6172e-01, -1.2008e-01, -2.1904e-01,
        -2.3985e-01, -3.9899e-01,  9.5676e-02, -1.3692e-02,  3.2200e-01,
        -9.8656e-02, -1.3520e-01, -5.5642e-02, -9.5815e-02,  1.0092e-01,
        -3.9078e-02, -6.9864e-02, -2.5136e-01, -4.9829e-01, -7.1526e-02,
        -1.1456e-01,  6.4730e-02, -1.3974e-01,  5.2129e-02,  1.3886e-01,
        -1.1450e+00,  3.2929e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1091,  1.0178,  0.0856,  0.4733,  0.3313,  0.3219,  0.4005,  0.1668,
         0.2135,  0.0389,  0.2259, -0.2907,  0.1012,  0.2846, -0.3011,  0.3703,
         0.1694,  0.1647,  0.0541,  0.0035,  0.4566,  0.0647, -0.0253,  0.6355,
        -2.2222,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2599,  5.2643,  0.5799, -0.8711,  0.3840, -0.1897,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5992e+00, -3.3461e+01, -1.7668e+00,  2.0106e+00, -1.4462e+00,
         1.7107e-01,  4.6616e-01,  2.3267e-02, -4.2776e-02,  1.8006e-01,
         3.9849e-02, -3.9819e-01,  2.3174e-01,  1.4289e-01,  7.8088e-01,
         4.4221e-01,  1.7433e-01, -9.6593e-02,  3.3069e-01, -5.1075e-02,
         1.0045e+00,  9.2103e-02,  2.7025e-02,  8.0759e-02,  7.9718e-02,
        -3.8977e-01,  1.2092e-01,  1.3362e-01,  9.7508e-02, -1.9841e-01,
        -1.2770e-01, -1.4484e-02, -1.4086e-01, -1.5414e-02,  4.1483e-02,
         1.6624e-01, -2.2440e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5049e+00,  4.1216e+01,  1.3658e+00,  1.2459e+00,  3.0739e-01,
        -1.8864e-01,  4.8055e-01, -5.7155e-01,  5.8780e-01, -4.4139e-01,
        -3.4536e-01, -2.0208e-01,  4.3571e-01, -2.0765e-01, -9.1144e-02,
         1.8844e-01, -5.5023e-01, -1.2159e-01, -5.6095e-01, -4.5050e-01,
        -5.9225e-01, -3.5070e-01, -4.4801e-01, -5.6507e-01,  1.1281e-02,
        -2.5418e-01,  6.1902e-01,  1.0307e-01, -1.6173e-01, -7.3020e-01,
        -1.3068e-01,  7.4534e-02, -1.0271e-01, -1.2020e+00,  1.5149e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3910e-01, -1.0089e+01,  1.3936e+00, -5.7047e-02, -8.4083e-01,
        -4.0287e-01, -2.3252e-02, -6.9553e-03,  1.7020e-01,  3.3691e-01,
        -2.4981e-01,  1.7895e-01, -1.6301e-02,  1.8832e-01,  3.3388e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4175e+00, -2.3343e+01, -1.1069e-01,  7.7797e-01, -1.1069e-01,
        -5.4933e-01,  4.6416e-01,  1.2519e+00, -3.2254e-01, -9.5013e-02,
        -2.1349e-01,  3.9779e-02, -2.0319e-01, -1.6702e-01, -1.5611e-01,
        -1.9147e-01, -3.4779e-01, -1.9180e-01, -6.9382e-02, -9.1894e-02,
        -1.5405e-01, -1.2132e-01,  3.2988e-02,  1.1446e-02, -2.2985e-01,
         5.5463e-02,  2.7189e-01, -1.0775e-01, -2.8988e-02, -3.5091e-02,
        -4.4177e-02, -3.4834e-01,  5.5289e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1906e+00, -1.1072e+01,  8.6304e-01,  3.0453e-01, -6.7023e-01,
         6.0019e-01, -4.5951e-03, -2.2597e-01,  6.5961e-02,  2.5119e-01,
        -6.1495e-01, -2.9090e-01, -4.4395e-01, -3.6291e-02,  1.9369e-01,
        -8.0678e-02, -1.2858e-01,  1.0582e-01, -5.2371e-01, -3.1109e-02,
        -2.0597e-01,  2.1113e-01,  4.1687e-01, -2.4949e-02,  6.6616e-03,
         1.2986e-02,  8.3940e-02, -3.0778e-01,  2.7121e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 4.4198e+00,  2.5946e+01,  6.6362e-01, -6.0850e-01,  9.3894e-01,
         1.2286e-01, -6.5658e-01, -3.8751e-02, -3.5977e-01, -2.5801e-02,
        -4.1206e-01, -4.4944e-01, -3.6311e-01,  5.0489e-01, -3.0181e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6155e-01,  2.5339e+01,  3.7273e-01,  1.3712e-02, -4.7765e-02,
         6.5380e-02,  1.5406e-01, -7.9411e-02,  2.2292e-01, -3.4233e-01,
        -6.3968e-01, -1.2557e-01, -7.4931e-02, -1.8304e-02, -1.9174e-01,
        -1.0186e-01, -2.8400e-01, -2.6280e-01, -6.3127e-02, -4.3733e-03,
        -1.9102e-01, -5.6949e-02, -2.3080e-01, -9.3590e-02,  4.7565e-02,
         1.4637e-01,  2.9612e-02,  7.2269e-03, -2.4051e-01, -1.2026e-02,
         7.5708e-02, -1.3994e-01, -1.5410e-02, -3.5191e-02, -3.0234e-01,
        -6.1284e-02,  5.0955e-02,  2.2670e-02,  1.8377e-02, -1.6173e-01,
         1.0292e-01,  2.7466e-02, -1.4396e-01, -2.3495e-01, -4.2613e-02,
        -3.8830e-01,  6.2933e-03,  1.6646e-01,  4.2192e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4305, 14.0784, -0.5873, -0.6205,  0.4217,  0.4592, -0.2909,  0.1353,
        -0.0910,  0.4069,  0.0934, -0.1554,  0.1240,  0.0143, -0.1148, -0.2001,
         0.1186,  0.1039,  0.1364,  0.0637,  0.0203, -0.0142, -0.2427, -0.0929,
        -0.0296, -0.1559, -0.0451,  0.3700, -0.0379,  0.0332, -0.0465, -0.0367,
        -0.1191, -1.5027, -0.4323,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4735e-01, -3.6516e+01, -1.7504e+00,  1.3909e-03, -5.4727e-01,
        -1.9558e-01,  6.7695e-01, -8.4543e-01, -7.5769e-01,  4.4111e-01,
         2.7767e-01,  6.7790e-01,  2.1641e-01,  4.4135e-01, -7.7489e-01,
        -2.5872e-01, -4.7166e-01, -1.5046e-01,  1.0373e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7473e+00,  6.4020e+01,  2.1443e+00,  5.3110e+00, -1.6984e-02,
         2.0348e+00,  1.4993e+00,  1.9556e+00,  4.2269e-02,  1.7087e-01,
        -2.8173e-01, -2.1338e-01,  2.8512e+00, -2.2116e+00, -2.5947e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6983, 20.9156, -0.8789,  0.3474,  0.3278,  0.3109, -0.1467,  0.1005,
        -0.2843, -0.3266, -0.2825, -0.1128, -0.7224,  0.1371,  0.2503, -0.3154,
        -0.3211, -0.3407, -0.1339,  0.1616, -1.1188,  0.2034,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0851e+00, -1.5660e+01, -1.3485e+00, -6.1833e-01, -4.0566e-01,
        -4.1449e-01,  7.4770e-01, -1.0091e-01,  2.2309e-01, -1.6707e-01,
         4.6186e-01, -2.6197e-01,  5.0774e-01, -2.4308e-01, -1.0825e-01,
        -2.4655e-01, -1.9366e-02,  2.0829e-01,  1.5467e-01,  6.1679e-02,
         1.1895e-01, -5.5551e-01, -1.4365e-02,  2.0809e-01,  1.2506e-01,
        -1.1546e-01,  8.3867e-02, -2.4915e-01,  2.4130e+00, -2.6104e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.6694, 25.1394, -2.4806,  2.8326, -0.0804,  2.5062, -0.6596, -0.0274,
        -0.5592,  3.0383,  2.2398,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6754e+00, -3.6725e+01, -1.0893e-01,  2.7475e-01,  1.3148e+00,
         1.2821e-01,  2.8472e-02, -3.9406e-02, -1.3798e+00, -1.0376e+00,
         4.0626e-01, -2.1814e-02, -1.7204e+00, -4.3988e-01, -8.6765e-03,
        -6.7825e-01, -3.8924e-01,  7.5718e-01, -9.4147e-02,  1.5363e-01,
        -3.0457e+00,  4.2789e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.2318, -45.0292,  -3.4733,  -0.1628,  -1.3388,   2.4539,  -0.1716,
         -0.7338,   0.1159,   0.5816,   1.8519,   0.6377,   0.8998,   0.7123,
          0.5434,   0.5415,   0.4133,   0.2667,  -0.7124,   0.6326,   0.1267,
          0.2963,   0.5436,  -0.2772,   0.5163,   0.2440,   0.7615,   0.4480,
          0.8745,   1.2104,  -0.5174,   1.5997,  -0.6671,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.2304, -25.9286,  -5.4857,  -0.1760,   0.6203,  -0.9587,   0.8696,
          1.0717,   0.2428,  -0.8517,   0.2113,  -0.2154,   0.9421,   1.7592,
          0.7459,   0.9752,  -0.3667,   0.5628,   0.5228,  -0.3247,  -0.4361,
         -0.1625,   0.2887,   0.0651,   1.2348,   0.0556,  -2.5510,   0.5036,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1914e+01, -3.6373e+01, -1.4074e+01, -1.4042e-01,  1.5573e+00,
        -1.3082e-01, -4.5193e-01, -3.7177e+00, -2.4979e-01,  5.3291e-01,
         4.0610e-01,  3.8757e-01,  4.7793e-01,  5.2451e-03,  1.5741e-01,
        -1.0852e+00, -5.7922e-02,  4.5566e-01,  4.0651e-01,  3.3757e-01,
         5.7491e-01, -5.3612e-01,  6.2198e-01,  2.8348e+00,  1.4214e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-6.2072e+00, -1.9836e+01,  1.1630e+00, -4.1288e-01, -2.9157e-01,
         6.8466e-01,  1.2343e+00,  6.5060e-01, -4.4717e-01, -7.1717e-01,
        -7.2336e-01,  5.3888e-01,  4.7218e-02, -1.3898e-02,  1.2268e-01,
         1.0268e-01,  3.1455e-01,  8.1963e-02,  6.3252e-01, -2.5075e-01,
        -5.8386e-01,  1.6193e+00, -7.2771e-01, -1.9203e-01,  2.2375e+00,
         4.1479e-01, -1.2660e-02,  1.1985e+00,  3.2268e-01,  1.0621e-01,
         6.0929e-01,  5.4376e-01,  4.9226e-01,  5.6872e-02, -1.8090e-01,
         5.4704e-01,  1.7137e-01, -1.7301e-01, -4.3229e-01,  9.5392e+00,
         3.7758e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.6727, -16.4398,  -0.8953,   1.1407,  -0.4058,   1.3512,  -0.0702,
         -1.1202,  -0.2999,   0.5227,   0.8329,   1.0904,   0.3660,   0.1245,
         -0.6820,  -0.0948,   0.4697,   0.1892,  -0.3654,  -0.2235,   0.1169,
          0.1495,  -0.0773,   0.4216,  -0.5470,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  2.3426, -19.0447,  -2.1972,   2.4839,  -1.6864,   1.1209,   0.9440,
          0.2828,   1.3000,   1.2631,  -0.8949,   0.3299,   0.0882,   0.4848,
         -0.2509,   0.0244,  -1.0734,  -1.7541,   1.4752,  -0.0563,  -3.8839,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6546e-01, -1.1375e+01, -7.2791e-01,  5.2367e-01,  2.5602e-01,
        -4.8725e-01, -1.0252e+00,  7.5410e-01, -2.7141e-02,  1.0875e+00,
        -3.0671e-01,  3.7504e-01, -4.3624e-01, -1.6135e-01, -1.4975e-01,
        -4.8551e-03,  1.9860e-01,  1.2203e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0652e+00, -1.3563e+01, -7.7852e-02,  3.8772e+00,  8.7146e-02,
        -1.8376e-01, -1.1629e+00,  9.6400e-01,  7.3345e-01, -5.1749e-01,
         3.8040e-01,  6.0570e-01,  3.3581e-01,  2.0932e-01,  3.6486e-01,
         1.3441e-01, -4.6499e-01,  3.8314e-01,  9.3983e-02, -1.7297e-01,
        -5.3136e-01, -4.3975e-01, -2.9393e-01, -1.3324e-01, -3.0228e-01,
        -2.3263e-01, -3.8396e-03, -9.1918e-01, -4.9457e-01, -3.4401e-01,
        -1.5375e-01, -9.2208e-02,  2.5150e-02,  2.1108e-01,  5.1960e-01,
        -7.7482e-01,  6.1889e-01, -1.1880e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8198, -8.8915,  0.3995,  0.3926, -0.0777, -0.4913, -0.0351, -0.9036,
        -1.4996, -0.7655,  0.4721,  0.1452, -0.1635,  2.0194, -0.8904,  0.1337,
        -0.6491,  1.4120,  1.7357, -0.6003,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1361e+00, -1.2658e+01,  2.6423e+00,  3.6321e-01,  1.9611e+00,
        -5.7506e-03,  7.2693e-01, -1.3898e+00, -8.8127e-01, -2.0852e-01,
        -1.6140e+00,  9.9521e-02,  1.2275e-01, -2.1906e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.8658, -13.9965,   0.3666,  -0.0434,   0.1799,  -0.1309,  -0.3216,
          0.4891,   0.3686,  -0.5256,   0.9511,   0.2029,  -0.0779,  -0.3688,
         -0.1107,  -0.2987,  -0.7447,  -0.3395,   0.6110,   0.0468,   0.2970,
         -0.1784,   0.0503,  -0.6571,   0.1215,  -0.1909,  -0.1154,  -0.0322,
         -0.3016,  -0.2692,  -0.2753,  -0.8290,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2562, -7.8833, -0.5026, -0.2252, -0.7771, -0.1034,  0.1577,  0.2306,
         0.0430, -0.3378, -0.1187,  0.3022,  0.0594, -0.2791,  0.1315, -0.0772,
        -0.0511, -0.5099,  0.3042,  0.9574,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2511e-01, -6.5384e+00,  3.7570e-01,  1.2768e+00,  6.6328e-01,
         5.4222e-01, -4.4655e-01,  2.8630e-01,  3.5570e-01, -1.0862e+00,
        -2.3643e-02, -2.3791e-01,  2.6649e-01, -3.7730e-01, -2.3746e-01,
         5.9604e-02, -9.1679e-02, -2.1918e-01,  2.3743e-01,  1.0245e-01,
        -5.0676e-01,  1.7804e-01,  4.2110e-03, -2.7737e-01, -3.0564e-01,
         3.9086e-01, -4.5242e-02,  1.3959e-01, -2.5424e-02, -3.6959e-01,
        -1.2246e+00, -4.8545e-01, -8.2369e-01, -3.1798e-01, -3.7748e-01,
         1.7874e-01, -7.7348e-01,  1.0245e-01,  2.6926e-01, -3.5107e-01,
        -4.4556e-02,  3.3460e-02, -8.1464e-01, -6.9550e-01, -5.9693e-01,
         1.7935e+00,  6.5606e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  6.6155, -34.0929,   4.1471,  -0.2450,  -1.6934,  -0.1316,   0.4529,
          3.5246,   0.5370,   0.7990,   1.2421,  -1.0257,  -0.8725,   1.9939,
         -0.7849,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3645e+01,  5.2888e+01, -5.9039e+00,  7.0383e+00, -1.8556e+00,
         3.9329e+00, -1.7206e+00,  1.8004e-01,  1.1567e+00,  3.1414e+00,
        -1.8825e+00, -9.4499e-01, -1.0079e-01,  4.2813e+00, -2.9350e+00,
         1.9699e-01,  3.2648e-03, -9.5353e-01,  4.9622e-01, -4.2744e-01,
         5.3047e-01,  3.6639e-01,  1.2168e+00,  1.0945e+00, -2.6369e+00,
        -1.0334e+01, -6.5497e-01,  1.7410e+00,  1.3730e-01, -4.7269e-01,
         6.1790e-01,  1.5223e+00, -1.6368e+00,  6.7281e-01, -3.2216e+00,
        -1.5190e+00, -1.5014e+00, -1.7963e+00,  7.2172e-01, -9.4590e-01,
        -7.4890e-02, -3.4283e-01, -3.6285e-01,  1.2759e+00,  1.2836e-01,
         9.1991e-01, -4.9900e+01, -9.1786e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-1.0322e+00,  2.0756e+01,  1.2218e+00,  3.7846e-01,  3.7013e-01,
         1.0361e+00, -1.6501e-01, -1.4187e-01,  1.6540e-02,  1.0163e-01,
         9.5635e-02,  7.0716e-01,  1.2494e-01,  5.6378e-02, -1.8869e-02,
         7.1786e-02, -1.4294e-01, -1.5755e-01, -4.8905e-02, -3.1243e-02,
        -6.7208e-03,  4.6643e-02,  1.8416e-01,  1.5107e-01,  3.0509e-01,
        -8.3927e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0030e+00,  7.2506e+00,  2.9419e-01,  6.5702e-01,  4.6575e-01,
        -8.0996e-02, -2.6048e-01,  5.6965e-02,  5.2267e-01, -2.2325e-02,
         1.9134e-01, -3.2267e-03,  2.1278e-02,  2.9200e-02, -1.4938e-01,
         8.5896e-02,  1.7732e-01, -3.2105e-01, -1.4025e-01,  6.9543e-01,
         1.9339e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3361e-01,  2.1261e+00,  9.4072e-03,  5.0298e-03, -1.5320e-02,
         1.1289e-02, -3.0979e-02,  3.9003e-02,  9.4372e-03,  2.4485e-02,
        -1.3681e-02, -1.5522e-02, -2.0648e-02, -3.3847e-02, -1.1645e-02,
        -6.3263e-03, -4.6712e-02,  6.9391e-03, -6.1001e-02, -3.5674e-02,
        -2.0505e-02,  5.8434e-03,  7.5509e-02, -2.1724e-02,  2.0940e-02,
        -7.4697e-03, -5.1208e-02, -1.3341e-02,  8.9421e-05,  3.3884e-02,
        -2.8043e-02,  1.2991e-02,  2.1130e-02,  6.8037e-03, -6.0832e-02,
        -1.4159e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.4445e-01,  5.3764e+00, -7.3049e-03,  2.5966e-01, -9.0891e-02,
         5.0367e-03,  7.7464e-02, -3.9458e-01,  1.8200e-01,  3.0078e-02,
        -3.2697e-02,  1.2960e-01,  6.3162e-02, -4.4632e-02, -1.6730e-02,
         1.4825e-01, -1.3511e-01, -8.4808e-02,  1.8314e-01, -4.3149e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0178, 10.4661,  0.3481, -0.0437,  0.4046,  0.0276,  0.2752,  0.1037,
         0.3871, -1.1574,  0.3524, -0.2906, -0.1025, -0.0767, -0.3438, -0.0478,
        -0.2468, -0.2045,  0.1390, -0.5769, -0.3262,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8150, 14.8397,  0.6571, -0.2020, -1.2208, -0.0593,  0.3581,  0.0755,
         0.2590,  0.0836,  0.1544, -1.1804,  0.2247,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1903e+00,  1.4053e+01,  6.2413e-01, -1.2057e-01, -4.2368e-01,
        -4.2677e-01,  1.4110e-01, -8.4613e-02, -4.6620e-01,  1.8001e-01,
        -3.6752e-01, -1.7849e-01, -1.2008e-01, -9.7537e-02, -1.0129e-01,
         2.3027e-01, -2.0217e-01, -1.2064e-01, -1.7235e-02,  4.2560e-02,
        -1.2277e-01, -1.1175e-01,  7.5236e-02,  1.4383e-01,  1.0284e-01,
        -1.1270e-02, -3.6915e-02,  7.0835e-02,  5.7067e-03, -2.8125e-01,
        -6.8616e-02,  6.6359e-02,  4.1907e-02, -5.1967e-02, -1.4885e-01,
        -8.9995e-01, -6.5263e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1541e-01,  1.4911e+01,  5.9736e-01,  2.8823e-01, -7.1439e-01,
        -1.2710e-01, -1.0647e-01, -1.0656e-01, -2.5401e-01, -7.7959e-02,
        -1.5708e-01, -7.8789e-02,  3.8495e-02, -4.3432e-02, -3.2616e-01,
        -5.6861e-02, -6.9077e-02, -6.7665e-03, -7.1679e-02, -4.4128e-02,
        -1.9927e-01, -6.7299e-02,  2.9741e-01,  8.5167e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1712, 18.2732, -0.1028, -0.4261, -0.5166, -0.2496, -0.1134, -0.2973,
        -0.7666, -0.1390, -0.4330, -1.2351, -0.3507, -0.1945,  0.3652, -0.3752,
        -0.2748, -0.2455,  0.0711, -0.2705, -0.0615, -0.1478,  0.0742,  0.0244,
         0.1455, -0.1195, -0.0550, -0.0486, -0.1419, -0.0192,  0.6046, -0.8821,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9043e-01,  8.5923e+00,  1.5757e-01,  2.3934e-01,  5.1235e-02,
        -9.9015e-02, -9.3777e-02, -1.2439e-01,  3.8555e-02, -1.2849e-01,
         3.7958e-02, -1.9430e-02, -4.7800e-02, -8.3773e-02, -1.9317e-02,
        -1.1455e-01, -1.6290e-02, -3.6741e-02,  9.5444e-03,  2.1226e-03,
        -3.7304e-01,  2.9456e-01, -9.0039e-03,  1.1227e-02, -1.4594e-01,
         1.6808e-01, -1.0549e-01, -5.3578e-02, -1.1671e-01, -5.1351e-01,
        -3.1305e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5869e-01,  6.9751e+00, -5.4310e-01,  5.2143e-03, -1.8669e-01,
        -2.2664e-01, -3.5155e-01,  1.5606e-02,  2.0197e-01,  1.9149e-02,
         8.8757e-02, -4.1971e-02, -7.9099e-02, -1.0111e-02, -7.5171e-02,
         2.2573e-02,  9.0647e-02,  1.2801e-01,  4.9482e-03,  1.1508e-01,
         3.9874e-01,  2.5271e-01,  1.4279e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4759e-01,  6.2302e+00, -1.2784e-02, -3.3161e-02,  1.2593e-01,
         9.6311e-03, -7.5800e-02, -3.6002e-01, -1.1227e-02, -4.6971e-02,
        -1.8096e-01, -8.6067e-02, -1.7196e-02, -1.7594e-01, -9.8910e-02,
        -2.4667e-02, -1.3350e-02, -4.6223e-03, -6.7329e-02, -1.0593e-01,
        -1.5047e-01, -7.4067e-02, -3.9016e-02,  5.1115e-02,  7.1539e-03,
        -1.7219e-01,  3.2651e-01,  9.6585e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 4.1038e+00, -2.8827e+01,  8.1636e-01, -1.7536e-03,  8.5714e-01,
        -4.0823e-01, -4.4357e-01,  8.8000e-01,  5.3242e-01,  8.2310e-01,
         4.3831e-01,  2.1372e-01,  2.9172e-01,  2.3492e-01, -5.7959e-02,
         3.6053e-01,  3.5393e-01, -7.0899e-01,  9.6128e-02,  8.2816e-01,
        -4.6567e-01, -6.4638e-02,  7.2302e-01,  4.0585e-01,  5.1797e-01,
        -1.7531e-01, -1.8001e-01,  9.3146e-01,  1.3484e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.8991e-03,  2.2876e+01, -1.1307e-01,  4.8962e-01, -5.2092e-02,
        -4.9038e-01, -5.5240e-01,  7.7875e-02,  5.5326e-02,  4.0068e-01,
        -4.3643e-01, -3.3638e-01, -3.4116e-01, -2.2194e-01,  3.9262e-01,
         3.4996e-01,  5.2496e-01,  5.0402e-01,  4.5481e-01, -8.2509e-01,
        -5.3999e-01, -7.3858e-01,  1.6689e-01, -5.5316e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0249,  1.2984,  0.0287, -0.0093,  0.0613,  0.0692,  0.0045,  0.0293,
         0.0076, -0.0127,  0.0113,  0.0407, -0.0052, -0.0300, -0.0170, -0.0060,
        -0.0201, -0.0043, -0.0306,  0.0158, -0.0068, -0.0123, -0.0051,  0.0040,
         0.0291, -0.0166, -0.0058,  0.0061, -0.0224, -0.0081, -0.0328, -0.0124,
         0.1591, -0.0897], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0908e-01,  8.0923e+00,  3.3947e-02,  8.5432e-02, -2.3889e-01,
         2.0772e-01,  2.6132e-01,  2.7806e-01, -4.7257e-01, -2.5571e-02,
        -2.3254e-03,  9.1739e-03,  1.6166e-02, -5.3504e-02, -6.8640e-02,
        -1.0968e-01, -1.2568e-01, -4.6814e-03,  9.4964e-02,  2.1406e-01,
         1.8593e-01,  1.7242e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.4664e-01,  6.9369e+00, -3.0441e-02, -2.2198e-02,  1.3351e-01,
        -3.3456e-01, -2.2933e-01,  2.3137e-01, -3.1661e-01, -1.0272e-01,
        -4.0525e-02, -6.6574e-02, -1.0128e-04,  8.7151e-02, -3.2835e-02,
        -7.3728e-02, -6.6615e-01, -1.2724e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2778e-01,  1.4046e+01, -5.2801e-02,  2.2826e-01,  7.9345e-01,
         9.9800e-02,  7.2903e-03,  3.3007e-01,  2.0213e-01,  1.4729e-01,
        -1.5690e-01, -5.7138e-02, -2.8746e-01, -3.2502e-01,  2.7922e-02,
        -1.6296e-02,  1.2090e-01, -9.5956e-02,  2.4119e-01, -7.5264e-02,
        -9.0976e-02, -1.3003e+00, -5.5711e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5014e+00,  1.2763e+01,  3.0297e-02, -3.8287e-03, -4.3401e-01,
        -5.2979e-03, -9.0962e-01,  2.0043e-01,  7.4175e-01,  1.7594e-01,
        -3.3055e-01, -3.0464e-02, -1.5656e-01,  4.0074e-01,  6.9356e-01,
         3.6355e-01,  1.1459e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8266e+00, -3.3630e+01, -1.2670e+00, -1.6358e+00, -4.3426e+00,
         1.0726e+00,  2.5679e+00,  2.4511e-01,  1.3618e+00,  6.8741e-01,
        -6.5585e-01,  1.0654e-01, -6.3812e-01, -2.3105e-02, -3.9854e-01,
         1.1996e+00, -2.8214e+00, -7.5379e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8491,  9.6263, -0.7944, -0.4172, -0.1142, -0.0796,  0.0313,  0.0414,
        -0.1543,  0.4113,  0.3160, -0.0449, -0.3724, -0.1718, -0.2143, -0.0311,
         0.0982, -0.1515,  0.3009,  0.1090,  0.2061,  0.0595,  0.0451, -0.2269,
        -0.0390, -0.6672,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8230e+00,  1.2289e+01,  1.5762e+00, -8.8434e-01,  4.9094e-01,
         2.9741e-02,  9.6214e-02,  1.3663e-01,  5.7760e-02, -5.1366e-01,
         3.1126e-02,  2.8909e-01, -1.0593e-01, -3.6282e-02,  4.1607e-02,
         1.7460e-01, -9.0075e-02, -2.9871e-01,  1.2004e-01, -1.3816e-01,
        -9.5207e-02,  3.3078e-02,  2.8374e-01, -1.5485e-01,  4.9743e-03,
         7.9696e-03, -5.9609e-02,  1.2386e-02, -1.0865e-01,  9.5497e-01,
        -5.4847e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.3137, -33.1260,  -0.7844,   2.5558,   1.2288,  -0.4751,  -0.6423,
         -1.6782,  -0.2319,   1.1585,   0.1671,  -0.1523,  -1.7813,   0.0943,
         -0.6118,   3.9205,   1.1956,   2.1685,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3011, 10.7279, -0.6561, -0.1929, -0.3370,  0.3288,  0.1044,  0.1623,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 6.3117e-01,  9.5505e+00,  4.4129e-01, -1.4119e-01,  3.7090e-02,
         2.2211e-01, -1.2184e-01, -1.0046e-01, -6.5685e-02, -3.4223e-02,
         4.0923e-03,  8.3503e-02, -5.1693e-04, -3.3859e-03, -4.2051e-02,
         1.8619e-01,  5.6700e-02,  5.4207e-02,  7.5525e-02, -4.6441e-02,
         2.6233e-01, -1.3809e-01,  2.8237e-02,  3.8781e-03, -4.1212e-02,
         3.1173e-02, -8.9571e-02, -2.3300e-02, -5.8657e-02, -6.0506e-03,
         2.0863e-02, -3.4423e-02, -4.0182e-02,  5.3331e-03,  2.3034e-02,
        -1.0887e-01, -2.8146e-02, -1.1911e-01,  1.2242e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5213e-01,  3.0474e+00, -1.2050e-01,  4.5547e-02,  1.2141e-01,
         2.4125e-02,  3.6312e-02, -8.3750e-02, -1.0836e-01,  4.9741e-02,
        -2.5409e-02,  3.3790e-02, -7.3755e-03,  4.4144e-02,  3.9220e-02,
         3.6464e-02, -3.5713e-04, -7.3839e-02, -7.0279e-03,  2.5357e-02,
        -1.6004e-02,  5.7789e-02, -5.4423e-02,  1.1935e-02, -6.8078e-02,
         3.6430e-02, -4.8878e-02,  1.6400e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -3.2819, -24.7624,  -0.3397,   1.4960,  -0.2690,   0.1634,   0.1710,
          0.2315,  -0.2741,  -0.4279,  -0.2480,   0.8702,   1.5167,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0081,  7.6576, -0.3086, -0.0420,  0.1388, -0.1879,  0.0530,  0.1115,
        -0.1016,  0.1918, -0.0270, -0.1144,  0.0870,  0.3629, -0.4883,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6858e-02,  1.8519e+00,  1.3148e-01, -2.1101e-01,  5.0723e-02,
         3.5615e-02, -2.8490e-03, -2.8881e-02, -1.0485e-02,  2.9520e-02,
        -3.7915e-02,  2.8155e-02,  3.0876e-03,  2.5227e-02,  2.7707e-02,
         9.8785e-03, -1.9855e-02,  1.3142e-02, -1.2359e-02, -7.2107e-03,
         1.5467e-03,  2.1469e-02,  2.2466e-02,  1.8720e-03,  5.2991e-02,
         5.3719e-02,  1.7646e-02,  2.8548e-02, -1.1685e-02, -6.9690e-04,
        -5.7053e-02,  1.7922e-02,  2.4253e-02,  2.5221e-03,  3.4665e-02,
        -6.7967e-03, -3.1530e-02,  5.9575e-03, -2.2178e-02, -1.2944e-02,
        -1.6681e-02, -1.6956e-02,  7.7669e-03,  2.6287e-04,  3.7687e-02,
        -6.6971e-02, -2.0281e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9217e-01,  7.0305e+00,  2.3718e-01,  9.8684e-02,  3.8364e-02,
        -4.4473e-03,  7.4100e-02, -1.2373e-02,  2.0500e-01, -9.2380e-03,
         1.6318e-01, -1.5426e-02, -3.9079e-02, -6.3698e-02, -2.3373e-02,
        -6.5820e-02,  1.6994e-03, -6.2386e-02, -4.3684e-02, -4.6697e-02,
         4.8916e-03, -1.9166e-02,  4.8935e-02,  2.1186e-02, -2.5467e-02,
         1.0817e-02,  8.3510e-02,  3.0844e-02, -5.2802e-02,  9.0731e-02,
        -3.0060e-02,  4.9541e-03,  8.3435e-02, -3.0757e-02,  2.9451e-02,
        -4.7702e-02,  2.7432e-02, -1.2819e-02, -5.6535e-02, -2.5455e-03,
        -7.9590e-02, -4.7247e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1107,  3.3363,  0.1679,  0.0268,  0.1228,  0.1579,  0.1250,  0.0327,
        -0.0254,  0.1532,  0.0687,  0.1576,  0.0310, -0.0210,  0.0715,  0.0999,
         0.1063,  0.0478, -0.0225,  0.0129, -0.0126, -0.0078, -0.0137, -0.0514,
        -0.0106,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0060,  9.1256, -0.0842, -0.1488, -0.0210, -0.0444, -0.0606,  0.0181,
        -0.0918, -0.0772,  0.0249,  0.0690, -0.0258, -0.0276, -0.2064, -0.1773,
         0.1120, -0.2706,  0.0542, -0.0582, -0.1918, -0.1431, -0.3269,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.8234e-01,  2.5801e+01,  1.6540e+00, -6.0354e-01, -5.1664e-01,
         6.7491e-01, -1.0268e-01, -1.7633e-01, -1.3839e-01,  1.0265e+00,
         1.8598e-01,  4.5488e-02, -1.7340e-01,  7.9185e-01, -1.2023e+00,
        -1.0504e-01, -4.0132e-01,  9.1338e-02,  1.9785e-02, -2.5763e-01,
         2.4267e-01,  6.6319e-01, -8.6526e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0798, 18.2469,  1.7292, -1.1694,  1.0230,  0.0773,  0.3004,  0.1748,
         0.4640,  0.1461,  0.8462,  0.9233,  1.7422,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.0232, -14.1598,   0.3200,   0.1625,   0.8686,  -0.0926,   0.3570,
         -0.1555,  -0.1593,  -0.5908,  -0.2180,   0.6941,   0.0438,   0.7788,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8554e-01,  1.4198e+01,  6.4443e-01, -1.7390e-01, -1.8200e-02,
        -5.7651e-02, -8.4601e-02, -5.3099e-02,  1.5356e-01, -2.9422e-01,
         2.5184e-03,  3.1533e-01,  1.0429e-01, -1.3264e-01,  2.2587e-01,
        -1.4612e-01, -3.9295e-02, -2.6825e-01,  4.0705e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-2.6982e-01,  2.8816e+00, -4.1384e-01, -1.2830e-01,  8.3582e-02,
         4.1128e-02, -3.2081e-02,  8.2025e-02, -8.9750e-02,  3.8975e-02,
        -3.3520e-02,  7.7343e-04, -7.9699e-03,  2.0661e-02, -5.6224e-02,
        -5.2244e-02,  3.0885e-02, -3.7496e-02, -7.7663e-03,  5.5699e-02,
         1.2414e-02, -4.9859e-02, -3.9800e-02, -4.1821e-02,  1.3447e-01,
        -4.4586e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9598, 18.3926,  0.8383,  0.2023, -0.1075,  0.1252,  0.2393, -0.2051,
         0.0975, -0.7306,  0.4715,  0.2199, -1.0516, -1.6685, -0.1401, -1.2972,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.2613e-01,  1.2501e+01, -1.0896e+00, -2.7408e-01, -1.9007e-01,
         5.1282e-01,  5.7599e-01,  1.0571e-01,  5.8371e-01, -4.9592e-01,
        -4.9047e-02, -1.4634e-01, -1.2771e-03,  3.4461e-01, -4.7958e-01,
         7.9702e-01,  5.0098e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5006, 11.9844,  0.9463, -0.7045,  0.5996, -0.0122, -0.0809, -1.4422,
        -0.0643,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3711e+00, -1.8641e+01, -2.8158e+00,  5.9227e-01,  7.1045e-01,
        -1.8334e+00, -9.9111e-01, -4.5702e-01,  7.2006e-01, -2.8227e-01,
         4.7209e-01,  4.3803e-01, -2.0231e-01, -2.2327e-01, -4.4261e-01,
        -5.3413e-01, -4.5006e-01, -5.1976e-01, -1.0169e-01, -3.5997e-02,
        -4.3702e-01, -5.4300e-01,  1.2360e-01, -1.4715e-01, -6.6319e-01,
         5.3480e-01, -7.6691e-02,  8.4879e-02, -3.2108e-01, -3.2336e-03,
        -8.6567e-02, -2.3806e-01, -1.1285e-01,  2.3537e-01, -1.4277e-01,
         1.5980e-01,  1.0050e-01, -3.4595e-01,  1.5629e-01,  2.4246e-01,
         2.3519e-01,  3.1498e-01,  4.5031e-01,  7.3641e-01, -6.7772e-02,
        -1.1658e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7395e-01, -3.6287e+01, -2.9097e+00,  1.2612e+00, -6.2576e-01,
        -2.2346e-01, -8.5065e-01, -6.3009e-01,  5.6965e-01, -2.6947e-01,
         1.2751e-01,  2.4764e-02, -7.5657e-01, -5.5886e-01,  2.7476e-01,
        -9.5488e-01, -2.0012e-02, -6.0376e-01, -9.5142e-02, -5.1492e-01,
        -3.6039e-01, -3.6392e-01,  5.4827e-02, -1.8115e-02, -2.0213e-01,
        -1.7640e-01, -2.5317e-01, -1.0536e-01, -2.3552e+00,  5.2396e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5697, 48.7459,  0.9806,  0.1465,  0.9462,  3.4137,  0.8880,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3087, 29.8000, -0.9721, -2.0518, -0.3062, -0.5085,  0.8321,  0.6157,
         1.0342, -1.2065, -1.7144,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2629,  8.2176, -0.0486,  0.2797,  0.3054, -0.0962,  0.1080,  0.1098,
        -0.2238, -0.0908, -0.1918,  0.2189, -0.2649, -0.4286,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4495e+00,  1.5512e+01,  1.1600e+00, -4.3884e-02,  9.8621e-02,
        -3.8494e-02,  5.5844e-02, -9.6945e-02, -2.2456e-01, -2.1442e-01,
        -2.0752e-01,  3.4055e-01, -1.9537e-01, -3.9150e-01,  3.6438e-02,
        -2.2072e-03, -6.7922e-02,  5.8718e-02,  4.3112e-02,  2.1913e-02,
         6.4998e-02,  1.9398e-01, -1.2556e-01, -1.1381e-01, -8.3735e-02,
         7.0861e-03, -1.0263e-01,  4.1756e-02,  2.1388e-01,  3.7667e-01,
         5.1247e-04,  5.1281e-01, -2.4883e-01,  5.4965e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2245e-01,  5.1942e+00,  3.1822e-01, -6.8863e-03, -5.9399e-02,
         1.6494e-02,  7.6794e-02, -1.3281e-01,  7.4922e-02,  1.0151e-01,
         1.1541e-01, -2.4318e-02, -6.8990e-03,  4.7995e-02,  5.0768e-02,
        -1.1058e-02, -9.6630e-02, -9.1870e-02,  2.1597e-02, -6.5778e-03,
         8.2343e-02, -1.9847e-02, -1.0428e-02,  3.5339e-03,  8.5923e-02,
         1.6836e-02, -6.2677e-02,  4.1356e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2280,  8.7016, -0.0954, -0.3936, -0.2939,  0.1359,  0.0919,  0.0107,
         0.0588, -0.0733, -0.1762, -0.0697, -0.1900,  0.4010,  0.2235,  0.0409,
         0.1192, -0.1301,  0.0812,  0.0114,  0.0997,  0.1535, -0.4729, -0.3517,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([-6.9467e-02,  3.1553e+00,  4.1926e-01,  2.6039e-01,  1.2145e-01,
         1.0938e-01, -1.9920e-02, -3.1071e-02,  2.2076e-02,  1.1546e-02,
        -1.4283e-01,  5.6816e-03, -4.1087e-02, -1.0746e-01,  7.5614e-02,
         6.0774e-02, -5.9028e-02,  3.0166e-02,  7.8475e-02,  7.0278e-03,
        -1.9927e-03,  3.0107e-03,  6.7735e-02, -7.5508e-03, -7.7149e-02,
        -6.4265e-02, -4.0400e-02, -2.4336e-02,  3.7649e-02,  7.4478e-01,
         1.4308e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.2769e-01,  8.1716e+00,  2.3577e-01, -3.5387e-01,  2.1923e-01,
         8.4776e-02, -7.2389e-02,  1.6250e-01,  4.7736e-02, -1.1136e-02,
         4.1761e-03,  2.7757e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5640e+00,  1.9748e+01,  1.1952e+00,  8.5132e-01,  9.0004e-01,
         7.9363e-01, -5.9082e-02,  2.9861e-01, -1.1306e-02,  3.9823e-01,
         7.4866e-01, -8.8036e-01,  5.6523e-01,  1.0045e+00,  6.0022e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0852e+00, -1.8096e+01, -6.0679e-01,  2.2629e-01,  1.5946e-01,
        -9.9853e-02, -3.9016e-01, -2.9841e-01,  1.8539e-01,  2.9506e-02,
         2.5117e-01, -1.7962e-01, -2.6208e-01,  2.0276e-01,  1.9092e-01,
        -1.0120e-01, -9.0644e-03,  2.3981e-01, -2.8521e-01,  2.5173e-01,
        -2.2603e-02,  8.9624e-02,  1.1344e-01,  1.1125e-02, -2.3327e-01,
         1.9975e-01, -7.7332e-02,  6.1314e-02,  2.1449e-01,  2.1072e-01,
         6.0441e-02,  2.6066e-01, -4.1320e-03], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.0244, -11.7933,  -1.4316,   0.6561,  -0.2680,  -0.0416,   0.2919,
          0.1447,   0.1063,  -0.0416,   0.4638,   0.0370,   0.2931,  -0.2221,
          0.2614,   0.6771,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6911, 34.6587, -0.4489,  2.6611,  1.7764,  1.2556,  1.4678,  0.3950,
        -0.6847,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1760,  3.5112, -0.1839, -0.1219, -0.0661,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4190, 13.0961, -0.3108, -0.6985, -0.7465, -0.2632,  0.1126, -0.2529,
         0.1738, -0.0780,  0.2218,  0.2305,  0.2119,  0.2676, -0.0238, -0.0344,
         0.3060, -0.0596,  0.1771, -0.1653,  0.0355,  0.1340, -0.0400,  0.1246,
        -0.1318, -0.3560,  1.2562,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4385, 42.1074,  1.1837, -0.4463,  4.6205,  1.8318,  0.2165,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1632, 27.4899,  0.5064, -0.7011, -0.4159, -0.2050,  0.2191,  0.5266,
        -0.6965, -0.1976, -0.0691,  0.5002,  0.3899,  0.2648,  0.6959,  0.4767,
         0.4305, -0.7801,  0.3813, -0.0979, -0.2957,  0.5426,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0467e-01,  1.0914e+01,  8.8131e-02, -1.0198e-01, -2.1180e-01,
         2.6645e-01,  8.4407e-02, -1.0761e-02, -5.9862e-02, -2.5937e-01,
        -7.0042e-02,  2.6173e-01, -3.1354e-01,  1.6202e-01, -5.8210e-02,
         2.2782e-01,  4.4596e-02, -9.4703e-02, -6.6807e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0017e-01,  1.0373e+01,  3.6774e-01, -1.9144e-01, -8.7649e-02,
         1.3994e-01, -4.0551e-01,  1.0461e-01, -1.5767e-03, -5.9074e-02,
        -4.4441e-02, -1.9315e-02, -7.1796e-02, -8.8978e-02, -4.3067e-02,
        -7.4660e-02,  2.4051e-01, -1.6092e-02, -7.4609e-02, -3.4167e-01,
         5.9643e-03,  1.1188e-01, -1.8581e-02,  9.8474e-02,  1.5227e-01,
        -7.9879e-02, -1.3582e-02,  2.1051e-02,  3.1788e-03, -6.6778e-01,
        -2.2783e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 1.5238e+00, -1.3491e+01, -3.0321e+00, -9.3453e-02, -8.5033e-02,
         7.5848e-02,  1.4830e-01, -5.2680e-02,  4.5846e-01, -9.7734e-03,
        -9.4368e-02, -1.6172e-01, -2.5680e-01,  5.1202e-02, -2.3038e-01,
        -1.5649e-03, -2.4979e-02, -1.3136e-01,  2.2046e-01, -2.8980e-01,
         9.4442e-03,  3.9805e-03,  3.8258e-02, -2.1694e-01,  6.2490e-02,
         4.7966e-02, -3.6311e-02,  4.7976e-01,  1.6936e-01, -4.1627e-02,
        -1.2889e+00, -3.5630e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3680, -3.3088, -0.1547, -0.0993, -0.0500, -0.0200, -0.0139,  0.0124,
        -0.0513,  0.0694,  0.2374, -0.0561,  0.0155,  0.0165, -0.0207,  0.0080,
        -0.0307, -0.0388,  0.0230, -0.0073, -0.1165,  0.1242,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7662e-01,  5.5166e+00, -4.9586e-02, -2.6039e-01,  2.9525e-03,
         7.7343e-02, -2.8232e-01,  5.7932e-02, -2.2576e-03, -7.7571e-02,
         3.1529e-02,  1.2829e-01,  7.7937e-02,  4.9607e-02, -1.1238e-01,
        -4.2719e-02, -7.3705e-02,  1.0383e-01,  4.9422e-02, -2.5231e-02,
        -5.4552e-02, -1.5509e-01,  3.0624e-02, -9.6531e-02, -1.0569e-01,
         9.7277e-02, -3.4510e-02, -5.1320e-02, -2.2241e-02,  1.7378e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0720,  2.7732, -0.1266, -0.0222, -0.0140,  0.0232,  0.0030, -0.0143,
        -0.0197,  0.0140, -0.0483,  0.0382, -0.0355,  0.0203,  0.0271, -0.0146,
        -0.0633, -0.0251,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.3431e-01, -1.6497e+01, -2.0922e+00, -1.2779e-01, -3.8572e-01,
         5.0765e-01,  1.7854e-02,  8.7579e-03,  4.1715e-01, -2.5607e-01,
        -2.4658e-02, -1.4698e-01, -1.4642e-01, -1.6037e-02,  5.2551e-02,
         6.6795e-01,  5.9998e-02,  5.1514e-01,  4.4897e-02,  2.4993e-01,
         3.7189e-02,  2.1571e-01,  4.6963e-02, -2.0440e-02,  8.0407e-02,
         2.3717e-01, -1.3027e-01, -1.6828e-02, -2.5220e-02, -1.4992e-02,
         2.4802e-02,  7.1527e-02,  2.9105e-02,  1.4765e-01, -6.0054e-02,
         4.0033e-01,  3.9391e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1961e-01, -2.1550e+01,  5.0942e+00,  8.8143e-01,  4.6169e-01,
         2.3183e-01,  8.1318e-03, -5.2408e-01,  1.3402e-01, -3.3901e-01,
        -1.6799e-01, -1.0843e-01, -2.4210e-01,  1.9648e-01, -1.5452e-01,
        -2.1243e-01,  1.7435e-01, -4.6527e-01, -2.6283e-01,  6.2595e-01,
        -6.3989e-01,  4.4982e-02, -1.2062e+00, -1.1457e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.4970e-01,  8.3883e+00, -1.8207e-01, -5.4975e-02,  2.5451e-01,
         8.8627e-02, -4.3175e-02,  5.8054e-02, -2.0858e-01, -7.1169e-03,
         8.0048e-02, -1.3192e-01, -1.3778e-01,  8.0757e-03,  4.2140e-03,
        -2.3529e-02, -2.1491e-05, -1.0168e-02,  1.2749e-02,  2.3773e-02,
        -4.9100e-02,  9.7163e-02, -3.9786e-02,  1.5029e-01,  1.9899e-01,
         6.6745e-02,  6.5245e-02,  6.6001e-02, -1.2069e-01, -1.1719e-01,
        -2.7919e-01, -9.3959e-03, -2.5307e-02,  5.0418e-02,  6.8487e-03,
        -1.5618e-02,  4.5586e-02, -7.5153e-02,  5.3920e-02,  6.6567e-03,
        -1.5557e-01,  8.0042e-02,  1.1579e-03,  1.7395e-01,  2.1161e-02,
         1.8151e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3499e-01,  9.2145e+00,  4.7615e-01,  3.6051e-01, -3.3725e-01,
         2.3238e-01, -1.6357e-01, -1.7021e-01, -9.3487e-02, -2.5677e-01,
         2.7593e-01,  1.3841e-01,  6.2431e-01, -1.6805e-03,  1.7534e-02,
         1.5831e-01, -8.8781e-02, -2.8646e-01,  7.6985e-03,  1.2146e-01,
         1.8836e-01, -2.5123e-01, -2.3688e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6588e+00, -3.5161e+01,  1.9269e+00, -9.0416e-01, -2.0160e-02,
         5.6200e-01,  5.3434e-01,  9.5147e-01,  1.2258e-01, -4.5112e-02,
         7.2831e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5649, 17.6467, -0.6868, -1.3164,  0.1289, -0.7958,  0.1281,  0.6901,
         0.6493, -0.0703,  0.4303, -0.1138, -1.1991, -1.2815,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3873,  7.6530,  0.7071, -0.6929, -0.1789, -0.1078,  0.1718,  0.1141,
         0.2312,  0.4184,  0.0932,  0.1865,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.9257e-01, -2.7867e+01, -1.1223e+00, -5.8710e-01, -6.0679e-02,
         7.3160e-01, -1.3623e-01, -5.4826e-02,  3.2679e-01, -2.9176e-01,
        -1.7526e-01,  5.6893e-02, -1.8518e-01, -4.4831e-02,  5.4511e-02,
         4.7005e-02,  1.2515e-01,  1.8562e-01,  2.4025e-02,  5.4048e-01,
         1.3712e-03, -6.2493e-02, -1.3113e-02,  3.7687e-01,  1.6887e-02,
         1.0926e-01, -5.3315e-01,  6.8348e-02, -2.4577e-01,  2.8585e-01,
        -1.7324e-01, -4.1872e-01, -1.4133e-02, -9.1593e-03, -2.7454e-01,
        -1.5018e-01, -1.8608e-01,  2.0729e-01,  1.6462e-01,  1.9013e-02,
        -7.1987e-02, -9.5430e-02, -6.0493e-02, -4.2869e-02, -1.4627e-01,
        -1.1290e-02, -1.1679e-01,  4.9931e-02,  8.9443e-02,  1.6651e-01,
        -8.7270e-02, -5.0719e-02,  9.0552e-03, -1.5305e-01, -3.9213e-02],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-5.1117, 45.6676,  1.1666, -0.0919,  0.2176,  0.1485, -0.4909, -0.5348,
        -1.0176,  0.5549, -1.0268,  1.6714,  1.0258,  0.2438,  1.2863, -0.6702,
        -0.2538,  1.1622,  0.2047,  0.7987, -0.2978,  1.7927, -0.5550,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.6992e-01, -2.3526e+01, -2.1266e+00, -9.3622e-01, -6.9513e-01,
        -2.7509e-01, -3.6236e-01, -5.2916e-01,  3.6648e-01, -5.8827e-01,
         6.3656e-05, -1.5701e-01,  7.4722e-01,  3.9131e-01,  3.6917e-01,
         2.7275e-01,  1.0608e-01, -8.3842e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0194e+00,  1.5295e+01,  1.1031e+00, -1.5624e-01,  1.5509e-02,
         1.2677e+00,  1.5272e-01,  2.8131e-01,  5.0784e-01,  9.2364e-02,
        -1.1036e-01,  7.6373e-02,  2.3406e+00,  8.6077e-03, -1.4376e-01,
         1.5320e-01,  3.2086e-01,  9.0631e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7240, 32.5407,  1.2439,  0.0475,  0.8793,  2.0918, -0.2547,  0.7951,
         1.1585,  0.2716,  0.3177,  1.2317,  1.0691, -0.6779, -2.0595,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4658e+00, -3.2492e+01, -3.6891e-01, -1.3489e+00, -5.1956e-01,
        -4.1314e-01, -5.9542e-01, -9.1448e-01, -1.0883e+00, -2.8980e-01,
        -7.2537e-01, -4.0617e-01, -8.7092e-01, -2.3433e-01, -3.4416e-01,
         6.7712e-03,  1.2933e-01, -2.4649e+00, -9.7356e-02, -5.6958e-01,
         1.7819e-01,  2.1224e-01,  2.1192e-01, -3.1404e-01, -2.0540e-01,
        -5.9298e-01, -1.5833e+00, -3.9263e-01, -1.2436e-01, -9.7862e-01,
         1.0069e+00,  5.3268e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3212e+00,  2.4719e+01,  8.8046e-01,  6.9984e-01, -4.2327e-01,
         1.7854e-02,  8.9749e-02, -2.8414e-02, -8.5077e-02, -2.8153e-01,
        -5.0107e-01,  2.7574e-02,  1.2926e-01, -1.3758e-01, -8.5329e-02,
        -1.5415e-01, -2.1582e-02, -4.6349e-01, -5.5465e-01, -6.5575e-01,
        -2.6537e-01, -5.4694e-01, -6.2368e-01, -1.2460e-01, -3.4932e-01,
         4.0894e-02, -2.7327e-01,  3.7945e-01, -1.4586e-01, -9.5755e-02,
         4.8833e-01, -2.1815e-01, -4.0777e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6111e-01,  3.6052e+00,  3.0868e-01,  2.5558e-01,  1.3720e-01,
        -6.4362e-02,  2.3562e-02, -9.0242e-02,  1.1543e-02, -1.4924e-02,
         2.5982e-03, -6.2928e-02,  1.0839e-01,  2.4728e-03,  1.5591e-02,
        -2.1493e-02,  4.2964e-02, -2.3528e-02, -8.1414e-03, -1.5126e-03,
         2.5693e-03, -3.1590e-02, -9.2417e-03,  1.8013e-03,  2.7577e-03,
        -2.6094e-02,  2.5559e-03,  3.2728e-02,  2.7939e-02, -1.8978e-02,
         6.5411e-03,  4.3399e-03,  1.0597e-02, -5.0101e-02, -3.2067e-03,
        -9.0879e-04, -1.6820e-03, -1.1193e-02,  1.1093e-02,  2.4415e-02,
        -5.6392e-03,  1.0602e-02, -2.3555e-03,  5.4484e-03,  3.4507e-02,
         1.9259e-02,  9.8116e-04, -1.0307e-02,  1.9799e-02,  4.9898e-03,
         5.8316e-03,  3.4530e-02,  8.2559e-03, -1.1086e-02,  4.9483e-02,
         2.7587e-02,  4.5370e-02, -9.3878e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3156e+00, -3.6840e+01,  6.5323e-01, -6.9383e-01, -3.8179e-01,
        -3.6871e-01, -2.2297e-01,  1.7623e-02,  7.2557e-01, -1.5724e+00,
         2.4406e-01, -1.2196e+00,  3.6763e-01, -3.7302e-01,  4.4594e-02,
         3.2784e-01, -8.8058e-01,  5.5396e-01,  3.5238e-01, -5.9844e-01,
         1.5419e-01,  3.0609e-02,  1.0235e+00, -7.0249e-01, -1.4493e+00,
         4.3836e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.1791e-01,  2.0971e+01,  6.0493e-01, -6.0969e-01,  4.3713e-02,
         4.4040e-01, -9.8651e-02,  2.1105e-01, -3.1802e-01, -1.4883e-01,
         4.4185e-02, -7.6392e-02,  4.3743e-01,  3.9226e-01, -1.8758e-02,
        -3.7023e-02,  9.5558e-03,  3.3039e-01,  9.1473e-03, -1.2880e-01,
         5.3422e-02, -9.3728e-03, -8.9732e-01, -5.2854e-01, -5.6102e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7285e-01,  6.3092e+00, -2.7391e-01, -4.0907e-01, -2.3030e-01,
         1.0665e-01, -2.8661e-02, -1.5216e-01, -1.1430e-01, -3.0793e-01,
        -2.2939e-01, -1.6889e-01, -7.5633e-03,  2.0234e-01, -9.2629e-03,
         1.5052e-01, -1.2786e-01, -5.2153e-02,  1.8484e-02,  5.9894e-02,
         2.1857e-02, -4.0260e-02, -1.4229e-03,  7.4036e-02, -4.9310e-02,
         1.5545e-02,  2.0428e-02,  6.6819e-02,  3.9346e-02,  2.0991e-02,
         4.9231e-02, -5.8279e-02,  1.2654e-02,  4.4675e-02, -2.7167e-02,
         1.2455e-02,  5.3424e-02,  1.6153e-03,  1.1064e-02,  9.4812e-02,
         5.3737e-02,  1.3895e-02,  2.4142e-02, -3.0006e-02, -2.0883e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4657e+00,  1.5641e+01, -1.2417e+00,  2.8806e-01, -2.5210e-01,
         1.2993e-02, -1.8460e-01,  2.2458e-01,  4.8066e-01, -1.3098e-01,
         2.1939e-01,  1.8951e-01, -1.7175e-01,  8.1812e-02, -2.7809e-01,
        -3.0409e-01, -9.8035e-01, -2.8355e-01,  3.6925e-01, -3.8388e-02,
        -2.9960e-01, -5.5725e-01, -1.3822e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5784, 10.0668, -0.3262, -0.4279,  0.1134,  0.3177,  0.4050,  0.4745,
         0.2703,  0.2640, -1.0469,  0.7691,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.2193,  9.9781,  0.2743, -0.2947, -0.2645,  0.0494,  0.0568, -0.2438,
         0.2218, -0.2843,  0.2849, -0.0863, -0.1022,  0.3571,  0.0707,  0.1747,
         0.0690, -0.1249,  0.0370,  0.2185,  0.0575, -0.0583,  0.4031,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0817e-01, -2.6734e+01, -9.2091e-01,  3.7811e-01, -7.0957e-01,
        -1.0474e-01,  5.6255e-01, -4.0223e-01, -4.9921e-01,  6.3806e-01,
        -1.9099e-01, -7.3502e-02, -4.3982e-02, -3.9191e-01,  1.9446e-01,
         1.8257e-01,  2.9906e-01,  4.8384e-01,  8.6961e-02,  4.9258e-02,
         1.6659e-01, -2.4273e-01,  5.2939e-01,  4.0378e-01, -4.2216e-04,
         1.6403e-01,  3.5114e-01,  1.5085e-01,  5.4153e-01,  1.8150e-02,
        -3.8463e-01, -1.3007e+00, -1.3812e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1698, 35.3169,  1.5037, -0.4190,  0.2131,  0.2081, -1.3574, -0.2021,
         0.2648, -0.6476,  0.2882,  0.6764,  0.2842, -1.0679,  0.0689, -0.5820,
        -0.1281,  1.2033, -0.7861,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8376e-01, -3.0848e+01,  2.4471e-02, -4.5885e-01, -1.0660e-02,
        -1.1001e+00, -1.4723e+00, -5.3387e-01, -3.9623e-01,  7.8729e-01,
         3.5848e-01,  6.7859e-01, -9.0246e-01, -1.0697e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5538e+00,  2.6848e+01,  6.9045e-01, -8.5385e-01, -2.8144e+00,
         8.7001e-01, -2.2801e-01, -6.1552e-01,  4.5809e-01,  4.7344e-01,
        -7.8292e-03, -3.5329e-01,  5.7801e-01,  2.0551e-01, -5.1648e-01,
        -1.5747e-01,  2.8689e-02, -9.9359e-02, -6.8246e-03,  3.3395e-01,
        -3.2852e-01,  1.6887e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1426e-01,  9.8100e+00,  2.0257e-01, -5.5395e-01,  9.8845e-02,
         1.9042e-01, -2.5652e-01, -1.7881e-01, -1.4135e-01, -3.3768e-02,
         2.4091e-02, -5.9914e-02, -2.2580e-01,  7.3907e-02, -4.2393e-03,
         1.6016e-01,  2.6555e-01, -4.0922e-02,  4.6562e-02,  1.2328e-01,
         4.7504e-02, -3.3660e-02,  3.0177e-02, -1.5055e-01, -7.3853e-02,
        -2.2385e-02, -1.8599e-02,  1.7727e-01,  1.6138e-01,  1.7922e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2671,  4.0065, -0.2413,  0.0354,  0.2882,  0.0222,  0.3456, -0.2601,
        -0.1373,  0.0518,  0.1484, -0.0099,  0.0308,  0.0611, -0.0609,  0.1782,
        -0.1409,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3425, 40.5635,  3.2068,  1.4874, -1.1749,  0.5966,  0.4294,  1.5947,
         0.9597, -0.4810, -1.3786,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6354, 30.2522,  0.7923, -0.8984,  0.4136, -1.4802,  1.0163,  0.2997,
         0.4468,  0.1782,  0.1776, -1.3205,  0.8158,  0.8688, -0.4955, -0.2384,
        -0.2910,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0867e-01,  1.0051e+01, -2.3682e-02,  4.4165e-01,  1.7222e-01,
        -7.8701e-02, -7.1489e-02, -4.6805e-02, -2.2918e-01, -7.0924e-03,
         2.3506e-01, -8.8001e-02,  5.1740e-02, -2.9127e-01,  2.7993e-01,
         1.9889e-01, -8.4061e-03, -1.5397e-01,  1.0466e-01, -6.1567e-02,
         4.9602e-02,  2.0990e-01, -1.7158e-02,  7.9827e-02, -6.0543e-01,
        -8.3803e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1776e+00,  9.7377e+00, -3.3074e-01, -1.2269e+00, -4.4266e-01,
        -1.1063e-01, -3.3509e-01, -7.2513e-02, -1.2246e-02,  1.6720e-01,
         1.0144e-01, -4.1217e-02, -4.4392e-02, -2.4815e-01,  6.8683e-03,
         1.8520e-01, -7.0027e-02,  5.7361e-02, -4.3528e-02, -2.7683e-02,
        -2.5644e-02, -8.2942e-02, -3.2275e-02, -2.5422e-01, -3.5135e-01,
        -2.6186e-02,  3.7127e-02, -1.1749e-01,  3.1362e-01,  5.0668e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.5586, -27.3956,  -0.7997,   3.4467,   1.4045,   0.5231,   0.0500,
         -0.7418,   0.1402,   1.4249,   0.1521,   1.2201,  -0.1030,  -0.0672,
         -0.2829,   0.5311,  -0.1069,  -3.7448,  -1.8967,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.0931, 13.5881,  1.4635,  0.5845,  0.1643,  0.4053,  0.8016,  0.0146,
         0.2158,  0.0185, -0.0903, -0.0751, -0.0451,  0.0842, -0.4273, -0.1945,
         0.1978,  0.4324,  0.2551,  0.2538, -0.1209, -0.1228,  0.0171, -0.1570,
        -0.0364, -0.5399, -0.0929,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.9917e-01, -2.3668e+01, -2.8698e-01,  1.8566e+00, -8.2689e-01,
         1.6963e-01, -1.9306e-01, -1.6195e-01, -2.8887e-04,  4.6317e-01,
         2.7850e-01, -8.4707e-01, -6.8558e-01,  1.7037e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2399e+00, -4.1238e+01, -1.1322e+00, -4.7231e-01,  5.8266e-01,
         1.0753e-01,  4.6003e-01, -1.2009e+00, -1.3253e-01,  2.1798e-01,
        -4.0448e-02, -1.6222e+00,  1.0180e+00, -7.8177e-01,  3.2269e-01,
        -1.4687e-01,  1.1201e-01,  4.1485e-01, -8.7035e-02, -8.7672e-01,
         3.3214e-01,  7.2920e-01,  1.2457e-01,  4.3731e-01,  7.1150e-01,
         2.6929e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5207e-02,  6.1074e+00, -1.3566e-01, -6.4599e-02,  1.7797e-01,
        -2.9721e-01,  2.5340e-01, -4.0770e-03, -1.2822e-01, -7.8709e-02,
        -1.8490e-01, -1.6449e-01,  4.2707e-01, -1.8520e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8474e-02,  9.1717e+00,  4.7123e-01, -6.3709e-02, -7.6042e-02,
        -6.8705e-02, -1.3483e-01,  2.8623e-02,  9.9396e-02,  4.3823e-02,
         1.1079e-01,  3.3283e-03, -8.0813e-03, -8.4517e-01,  2.0620e-02,
         3.1883e-02, -1.9554e-02, -1.1503e-01, -1.0186e-02,  4.5993e-02,
        -7.3769e-02, -1.9239e-02, -2.6734e-02,  6.7548e-02,  1.3972e-01,
         3.3229e-02, -2.9036e-02, -1.0542e-01, -1.7740e-01,  1.4249e-01,
         7.9915e-02,  2.7162e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9953e-01,  3.2971e+01,  3.2787e-01, -9.1012e-01,  1.1500e-01,
         3.4486e-01, -1.3846e-02, -1.2295e+00,  2.4827e-01, -4.4273e-01,
        -4.9968e-02, -2.6859e-01, -1.2398e+00,  1.9382e-01, -4.4609e-02,
        -4.5542e-01, -2.5546e-01, -6.9187e-01, -6.7599e-01, -4.2169e-01,
        -1.0264e-01, -6.3349e-01, -1.2495e+00,  4.6201e-01,  6.8990e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5726,  8.3600,  0.3807,  0.3854, -0.3403, -1.0554,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0273e+00,  2.9387e+01,  9.0430e-01, -1.3993e+00, -4.2520e-01,
        -2.2693e+00, -9.8456e-02, -1.1099e-01, -1.1823e-02, -1.0530e-01,
        -5.7353e-01, -5.0637e-01, -4.3012e-01,  1.1183e-01, -3.3622e-01,
        -2.8454e-01,  4.6345e-01,  1.4461e-01, -1.6443e-01, -9.9211e-02,
        -1.8547e-02, -1.8544e-01, -5.2088e-01,  3.5063e-02, -1.7512e-02,
        -2.9055e-02, -5.6081e-01, -4.9866e-02,  2.9059e-01, -6.2398e-02,
         1.4243e-01, -2.1616e-01,  2.6654e-01, -1.6917e-01, -1.6042e-01,
        -7.4015e-01,  3.7190e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0723e-01, -9.1096e+00, -2.5838e-01, -3.9242e-01, -1.3806e-01,
        -8.4683e-02, -4.6131e-02,  7.0885e-02,  1.6651e-02,  3.6619e-03,
         5.0587e-02,  6.1874e-02,  7.0790e-02,  1.4039e-01, -2.6019e-02,
         1.8169e-01,  6.3107e-02, -6.8222e-02, -6.8398e-03,  4.9674e-03,
         1.5804e-01, -3.4640e-02,  5.1777e-02, -7.8112e-02, -9.5131e-04,
         2.3662e-02,  1.3230e-02, -8.2708e-02,  8.8116e-02,  6.6459e-02,
         7.6606e-02, -2.7136e-02,  1.9571e-02, -1.3231e-01,  2.2794e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3439e-02,  8.1765e+00, -1.6479e+00, -6.6381e-01,  2.7714e-01,
        -4.8174e-03,  4.9733e-03, -2.6169e-01, -2.3022e-01,  4.9023e-03,
        -2.4981e-01, -5.5820e-01, -1.8903e-01,  8.4045e-01, -5.0331e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2243e-01, -7.6128e+00, -3.2359e-01,  3.6402e-01,  6.0845e-02,
         1.9290e-01,  9.6603e-02, -5.6826e-02,  1.6090e-01, -2.9890e-01,
        -9.3915e-02,  1.7164e-01, -2.2423e-02,  4.9373e-02, -4.3370e-02,
        -1.3959e-02,  4.9480e-02,  4.5287e-01, -1.0926e-03,  1.1229e-01,
         1.7421e-01,  9.6476e-02, -1.9265e-03, -5.4806e-02,  1.4553e-02,
         3.5776e-02,  8.9756e-02,  5.2142e-02,  2.0640e-01, -1.1593e-03,
         4.4087e-02,  8.5576e-01, -7.8090e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9267, 67.2518, -0.1202,  0.8807,  0.0874,  0.5113, -0.4854,  1.1869,
         2.0417,  0.1493, -0.2622, -1.2135,  0.7982,  0.2949,  0.2079, -0.6320,
         0.7162,  0.1469, -0.9293, -0.8028, -1.1230, -0.8388, -0.4317,  0.3226,
         0.2301, -0.1758,  0.1642,  1.1169,  1.9656,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 7.7887e-01,  1.6406e+01, -9.9660e-01, -7.7704e-01,  4.7704e-01,
        -9.6644e-02, -1.8250e-01,  4.5778e-04,  4.2507e-02, -7.5539e-01,
        -3.6642e-01,  1.0320e-01, -9.7952e-02,  8.8879e-01, -4.1177e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5357e+00,  1.2483e+01,  1.2133e-01,  6.4060e-02, -2.3043e-02,
         1.0364e-01, -2.3452e-01, -2.3214e-01,  2.2170e-01, -1.6098e-02,
        -2.9442e-02,  1.1065e-02, -5.1018e-02, -1.7747e-01,  1.0110e-01,
        -4.7764e-02, -8.6853e-02, -2.4204e-01,  4.2148e-02, -2.9709e-02,
         1.5559e-01, -4.2733e-03,  9.7026e-02,  1.5038e-01,  9.0731e-02,
         5.3224e-02,  2.8455e-02,  2.7967e-01,  1.5882e-01,  9.7526e-03,
        -9.2979e-02, -5.1975e-02, -9.7712e-03, -9.7315e-03,  9.6209e-02,
         1.6556e-01,  2.3924e-02, -1.6112e-02, -1.8904e-02, -3.5242e-02,
         8.0056e-02,  5.1846e-02,  1.6945e-02,  2.6618e-02,  3.7194e-02,
        -2.1796e-01, -9.4371e-02,  5.5901e-03,  2.4021e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8557e-01,  1.3951e+01, -1.3299e-01,  4.5898e-02,  1.8296e-01,
         1.2781e-01,  1.5621e-01, -1.9217e-01, -4.6415e-02, -3.0202e-01,
         1.3709e-01, -5.8234e-02, -1.0843e-01,  1.3484e-01, -2.6633e-02,
         9.6256e-02,  7.4056e-02, -7.4070e-02,  4.0658e-03, -3.6056e-01,
        -4.1181e-01,  1.2271e-01, -6.4239e-02,  1.9462e-02, -1.0730e-01,
        -1.0091e-01, -2.8607e-01,  1.1438e-01, -1.7282e-02,  7.6834e-02,
         1.9262e-01,  6.0103e-02, -3.5149e-02, -8.0769e-03, -4.2532e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1909e-01,  2.3327e+01,  1.4293e+00, -3.6526e-01, -2.1567e-01,
         1.5871e-01,  2.7351e-01,  8.8010e-02, -5.0295e-01, -9.1430e-02,
        -2.6995e-01, -2.2823e-01,  3.2229e-02, -9.0574e-03, -4.6896e-02,
        -2.1800e-01, -3.8441e-01, -1.9344e-01,  5.1745e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5246, 16.5680,  0.3932, -0.1309, -0.0472,  0.2572,  0.2236, -0.0282,
        -0.2914, -0.0631, -0.2463, -0.8581, -0.0864, -0.5087,  0.1253,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -4.4355, -39.0274,   0.6694,  -1.3569,   1.2790,   0.9294,   1.8091,
          0.4828,  -1.4870,   0.5512,   0.6719,   0.2790,   1.2532,   0.1537,
          0.1711,   0.6322,   0.4744,   0.5008,   0.7425,  -0.4992,   1.3891,
          0.7243,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4852e-01,  1.5134e+01, -3.5647e-01,  6.3224e-02,  3.9549e-01,
         1.2966e-01, -7.9830e-01, -1.9482e-02, -9.7257e-02,  9.8005e-02,
         1.6492e-02,  2.4559e-01,  8.5895e-01, -1.6730e-01,  5.8147e-02,
         8.5502e-02,  3.5256e-01, -1.1359e-01,  2.3523e-01,  1.8447e-01,
         1.9030e-01,  2.0255e-01,  1.1250e-02, -1.9457e-02,  2.1284e-01,
         1.4658e-01, -5.5573e-02, -1.7030e-01,  5.3542e-01,  7.6156e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1235,  9.6840,  0.1981, -1.1001,  1.0716, -0.1826, -0.1330,  0.2450,
        -0.7103, -0.6020,  0.0283,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0795, 24.6771, -0.1594, -0.5885,  0.6538,  0.2174,  0.1810, -0.4328,
        -0.4388, -0.1097, -0.3940,  0.6399,  0.0707,  0.0253, -0.1387,  0.1022,
        -0.1127, -0.3087, -0.0740, -0.2304, -2.4731, -0.3915,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7260e-02, -4.0459e+01, -1.3035e+00, -8.7211e-01, -1.0368e+00,
         5.1125e-01, -5.7391e-03, -1.7628e-01,  1.8248e-01,  7.4128e-01,
         7.7651e-01,  5.1283e-01,  1.0315e-01,  2.9176e-01,  4.3733e-01,
         2.5418e-01,  1.5260e-01,  3.4645e-01, -6.2720e-01,  3.5867e-01,
        -1.0840e-01, -7.6998e-02, -3.4271e-01,  1.5274e-01,  2.7666e-01,
        -6.3652e-02, -1.1006e-01, -6.1511e-02,  1.9730e-01,  4.3152e-01,
        -1.5884e-01,  6.2826e-01, -1.6856e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3093, 55.2025,  4.2573,  0.1480,  0.4283,  1.8399, -0.3360,  0.4044,
        -0.5222,  0.1580, -0.0559,  0.4928,  0.4122, -0.2167, -0.5580,  1.1832,
         0.3102, -0.2185, -0.0795, -0.6825,  0.1250, -1.2571,  0.5101, -0.3317,
         0.1273,  0.6222, -0.6021, -2.3207,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.9390, -24.8941,  -1.2788,   1.0433,   1.0942,   0.2426,  -0.4226,
         -0.5141,  -0.1257,   0.5134,  -0.0365,  -0.0906,   0.0646,  -0.3544,
         -0.1392,  -0.0976,   0.1936,   0.4884,   0.2488,  -0.5008,   0.2403,
         -0.0568,   0.3317,   1.6934,   0.3919,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 4.2339e-01,  1.6879e+01,  8.4222e-01,  3.0817e-01,  1.8591e-01,
         2.8649e-01, -1.2060e-01, -2.4337e-01,  1.1958e-01,  1.1610e-01,
         1.6506e-01,  2.6723e-01,  4.7527e-02,  1.0055e-01, -1.9315e-01,
        -1.8573e-01, -5.1545e-02, -3.9228e-02, -1.1488e-01,  6.5189e-03,
         9.8445e-02, -2.4724e-01, -3.1294e-02,  5.5952e-02, -3.1144e-01,
        -3.4829e-01, -1.1519e-01, -1.1291e-01, -1.7826e-01, -9.2032e-02,
         3.1456e-02, -1.7283e-01, -1.1138e-01, -1.4592e-01,  7.6551e-02,
         4.0295e-02, -1.2841e-02, -1.8162e-01,  5.6200e-02, -5.5318e-01,
        -1.3683e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7625e-01, -2.6028e+01, -1.7258e+00,  8.8598e-01, -1.5503e+00,
        -1.4155e-01,  2.6170e-01, -2.4003e-02, -1.8127e-02,  1.9315e-01,
         8.9935e-02,  4.5975e-01, -1.1805e-01,  1.2594e-01, -4.7533e-01,
         1.0725e-01,  3.7837e-03, -9.0992e-02, -5.4317e-03,  2.2701e-01,
         7.9220e-02,  1.9469e-01,  3.1760e-01,  1.7623e-02, -1.8577e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.4530, -21.6007,  -1.5479,   1.8489,  -0.0489,   0.2136,   1.2181,
         -0.2119,  -0.2078,   0.0772,   0.1047,   0.6163,   0.4615,  -0.2103,
          0.5448,  -0.1561,  -0.2007,  -0.3710,  -0.2798,   0.7077,   0.6835,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.0201, -34.6988,  -1.4001,  -0.5406,   1.3125,  -0.4786,   0.1456,
         -0.5565,  -0.6241,  -0.4248,   0.5028,  -0.0755,   0.3639,   0.1088,
          0.1908,   0.2612,   0.5895,  -0.3416,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.0723e-01,  8.1982e+00,  2.9649e-01,  6.0505e-01,  1.2453e-04,
        -2.8766e-01,  4.9224e-02, -3.1772e-01, -7.0672e-02,  7.8360e-02,
        -1.4079e-01,  1.9725e-02, -4.4345e-02,  6.6712e-02,  1.0085e-01,
         3.0148e-01, -3.2661e-01,  3.4474e-02, -2.9123e-02,  2.0892e-02,
         3.5359e-02, -4.0989e-02, -3.7473e-03, -6.5266e-02, -6.8964e-02,
         2.8719e-02, -9.3558e-02, -3.1054e-02, -6.5421e-03, -2.6527e-02,
         6.2901e-03, -4.9104e-02,  2.2407e-03, -3.3459e-02, -2.4109e-01,
         5.0903e-03, -1.3985e-02,  8.6913e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1101e+00, -2.3321e+01, -9.8308e-01, -1.6902e+00, -3.4166e-01,
        -1.4176e-01,  3.1382e-01,  3.9464e-01, -3.2910e-02, -5.1862e-01,
         4.7681e-01,  2.7121e-01, -1.3290e-01, -9.2832e-01, -5.7886e-02,
        -5.3243e-01, -8.5374e-03,  7.0974e-01, -1.1467e-01, -6.3619e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.7002, -23.6904,  -1.6733,  -0.3328,  -1.5209,  -1.1765,  -0.4313,
         -0.5118,  -0.1946,   0.3766,  -0.8022,   0.5353,   1.0583,   0.5995,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7463e+00,  9.3954e+00, -7.2410e-01, -6.4322e-01,  3.2580e-01,
         1.6440e-01,  5.8924e-01, -2.6871e-02, -2.2127e-02, -1.7679e-02,
         2.3225e-01, -2.1545e-01,  3.6481e-02,  2.7473e-03,  1.5824e-01,
        -1.1236e-01,  1.4560e-01,  4.2191e-01,  1.3102e-01,  1.3366e-01,
         2.3893e-01,  2.9187e-01, -3.2774e-02,  2.9117e-01,  5.4252e-02,
        -1.1479e-01,  5.4903e-03,  1.5456e-02, -9.8998e-02,  2.7745e-01,
         3.4404e-01,  3.8570e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.1518, -33.2491,   0.3627,  -1.8097,  -0.6684,   0.6030,  -1.4275,
          0.7563,   0.1481,  -0.0462,   0.3494,   1.1689,   0.0465,  -0.2771,
          0.3290,   0.1772,   0.1662,  -0.0503,   1.4115,   2.2255,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6196e+00,  1.0053e+01,  5.5979e-01,  3.0289e-02,  1.5590e-01,
        -5.5883e-01,  2.0231e-03, -1.0788e-01, -1.8667e-01,  1.4837e-01,
        -1.3938e-02,  1.0674e-01, -4.1192e-02,  6.0679e-02,  2.3773e-02,
        -4.5268e-03,  1.6713e-01, -2.0800e-02, -3.7766e-02, -5.8464e-03,
         4.3045e-02, -4.3718e-02,  1.3451e-01, -1.4658e-01, -1.6713e-01,
         6.6459e-04, -3.5122e-01, -3.9123e-01, -3.0525e-01,  7.5581e-02,
         3.1880e-01,  7.5308e-02,  3.0527e-01,  5.0496e-02,  6.2436e-02,
         7.0540e-01, -1.4954e-01,  4.4185e-02,  8.6343e-02, -2.1915e-02,
        -1.6202e-01, -2.8196e-03,  2.5078e-01, -6.5915e-02,  2.2944e-01,
         1.0023e+00,  8.2389e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3951, 35.3098,  0.6535,  0.8118,  1.8404,  0.7181,  0.6808, -0.6395,
         5.3231,  0.2640,  0.3207,  1.5990, -0.8190, -0.2409,  0.7640,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1528e+00, -5.3458e+01, -1.3047e+00,  1.5282e-01, -1.6754e-01,
        -3.4842e-01, -1.2589e-01, -1.9838e+00, -5.8929e-01,  3.0314e-01,
         1.5326e-01, -5.3512e-01,  1.4537e-01, -6.6429e-01,  1.1748e+00,
         4.7072e-01,  8.7011e-01, -7.0481e-02, -1.3386e+00,  6.7613e-01,
        -3.3632e-01,  7.3731e-01,  4.0748e-01, -4.9145e-02,  2.6868e-01,
         5.0545e-01, -9.7169e-01, -3.6409e-01, -6.0082e-01, -2.3622e-01,
        -3.1051e-01,  1.9501e-01,  2.2249e-01,  2.1788e-01, -4.1555e-01,
        -3.2493e-01,  2.1590e-01,  7.9923e-02,  3.3279e-01, -7.8804e-03,
        -1.9440e-01,  8.7759e-02,  2.4963e-01,  3.2467e-02, -7.8606e-02,
         1.7482e-01,  4.1650e-01, -1.9383e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-8.0455e-01,  1.6500e+01,  1.1639e+00,  5.9940e-02,  4.8308e-01,
        -9.7542e-02,  1.6258e-01,  6.3764e-02,  1.9671e-02, -8.5746e-03,
         9.7155e-02,  3.5416e-02, -2.3421e-01, -1.0169e-02, -7.3665e-02,
         1.2965e-01, -7.2505e-01,  5.9858e-02, -3.0255e-01, -2.0634e-01,
        -1.6295e-01, -1.2100e-01,  1.5024e-01,  2.2718e-01,  8.8951e-01,
        -8.4368e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6517e-01,  1.4752e+01, -6.8353e-01, -1.1924e-01,  1.1506e-01,
         3.4996e-01, -7.3378e-01,  2.7620e-03, -7.0311e-01, -3.1919e-01,
         6.3793e-01,  2.0756e-01, -5.1305e-02, -1.1365e-01,  3.6748e-01,
         2.8645e-01, -2.8915e-01, -1.2553e-01,  9.8520e-01, -1.2153e+00,
        -2.5650e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9420, 24.6904,  0.8876,  1.0861, -0.0691,  0.5527, -0.0920,  1.0960,
        -0.1846, -0.2371, -0.0905, -0.1985,  0.0920, -0.2350, -0.8732, -0.1985,
         0.0320, -0.4175, -0.2620, -0.4134, -0.4657, -0.1188, -0.2202, -0.1589,
         0.0933,  0.0271, -0.2649,  0.1199,  0.0471, -0.0288, -0.8055, -0.2660,
         0.4796, -0.6403,  0.0334, -1.4874,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4409e+00,  1.4298e+01, -5.5486e-01, -1.1764e-01, -1.3560e-01,
         1.2409e-02, -6.1012e-01,  2.0534e-01,  6.1916e-03,  1.4789e-02,
        -3.6217e-01, -4.6546e-02, -1.9123e-01, -1.9704e-01,  9.2871e-02,
         6.3208e-02, -1.0607e-01, -3.0338e-01,  3.2525e-01, -3.1126e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.7975, -19.2710,   0.1123,  -0.2998,  -0.3447,  -0.6309,  -0.0918,
          0.0745,   0.3829,  -0.2501,  -0.0755,   0.1091,   0.1379,  -0.3668,
         -0.3201,  -0.0939,   0.2959,   0.0699,   0.1558,  -1.4676,  -0.0454,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5691, 26.6312,  0.9337,  1.2098,  0.3420,  0.8098,  0.9670,  0.4047,
         1.0055, -0.5231, -0.0992,  1.8597,  0.5470,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0726e-01,  1.4047e+01,  8.0843e-01,  3.2715e-01, -2.5175e-01,
         8.4543e-02, -8.7769e-01, -2.2952e-01, -2.6268e-01, -2.7978e-01,
        -5.5132e-01,  1.1965e-02, -1.5072e-01,  4.3325e-02,  4.4006e-03,
         1.0523e-01,  8.9718e-01,  5.1987e-01, -2.0271e-02, -1.8255e-02,
         2.1706e-01,  1.8575e-01,  2.0149e-01,  2.7336e-02, -5.8065e-03,
         2.4709e-01, -6.7776e-02,  9.5784e-02,  1.7092e-02, -2.5775e-01,
        -4.4829e-02,  5.8898e-03,  2.9338e-01,  3.9021e-02,  1.1929e-01,
        -1.6636e+00, -7.5554e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5053e-01,  5.0443e+00, -1.2747e-01, -2.3069e-01, -3.1502e-01,
         5.4528e-02, -5.0142e-02,  2.5625e-01, -1.2041e-01, -7.2085e-02,
        -3.5188e-02, -1.4439e-02, -5.2125e-02, -3.2966e-02, -7.1782e-02,
        -3.1448e-02,  5.3825e-02,  1.0792e-03, -6.1254e-02, -1.7867e-01,
        -5.5176e-03, -2.3828e-02,  2.6267e-01,  4.4772e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1699e-01,  9.4682e+00, -4.5615e-02, -1.7045e-01, -1.3555e-01,
         8.1167e-02, -1.7498e-01,  2.1252e-01,  3.5395e-02,  1.9769e-03,
        -3.2570e-01, -4.8617e-04,  1.0135e-01,  2.9370e-02, -7.7934e-02,
        -4.8729e-01, -4.0634e-02, -1.7396e-02, -1.1490e-01,  7.8197e-02,
         2.2738e-03, -1.2362e-01,  1.3620e-01, -4.8180e-02, -8.4521e-02,
        -1.2880e-01,  7.3763e-04,  4.0567e-02,  6.0806e-02, -6.3800e-02,
        -6.9033e-01, -7.4574e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9107e-01,  2.0747e+01,  7.0267e-01, -2.3217e-01,  8.6305e-01,
        -9.9249e-03, -4.0992e-01, -6.3550e-02,  5.2859e-01,  9.4219e-02,
         2.1842e-01,  4.9961e-02, -8.5808e-02,  1.2622e-01,  8.0296e-02,
         8.3842e-02, -7.2075e-02,  8.7839e-02,  4.3973e-01,  1.1576e-01,
         7.3154e-01, -1.6503e+00, -5.2097e-01, -4.7872e-02,  9.8818e-02,
        -9.3267e-02, -3.9590e-02,  1.5010e-01, -3.8450e-01,  1.2748e+00,
         3.3343e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7850e+00, -3.3628e+01,  8.3232e+00, -8.1365e-01, -2.2366e-02,
        -1.8045e+00,  1.2658e+00, -4.4751e-03, -2.9593e-01,  6.2369e-01,
        -2.2968e+00,  1.5072e-02, -1.5399e-02, -4.5794e-01, -9.5396e-02,
         9.9883e-01,  4.6511e-01, -4.5414e-02,  6.6744e-01,  2.9392e-01,
         1.8915e+00, -4.3474e-01, -1.0655e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0621e+00,  1.4700e+01, -4.7651e-01,  4.4353e-01, -5.1016e-01,
        -3.6418e-01, -2.4231e-01, -2.5277e-01, -8.7256e-02,  7.5847e-02,
        -1.1415e-01, -1.1132e-01, -2.0760e-01, -3.0935e-01, -1.4094e-02,
        -9.4104e-02, -6.0844e-01, -1.1843e-01, -1.6511e-01, -9.8132e-02,
         5.2434e-02,  7.8818e-02, -8.2638e-03,  1.4693e-01, -6.4226e-02,
         1.6030e-01, -8.3494e-01, -7.8892e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-4.9202e-01,  2.5681e+01, -8.6352e-03,  1.2193e-01,  4.1492e-01,
         2.3248e-01,  2.1950e-01,  1.0314e-01, -7.3225e-02, -4.0209e-01,
         3.7173e-01,  6.5335e-02, -1.0191e-02, -1.5210e-01, -2.1629e-01,
        -1.1132e-01, -9.1468e-02, -8.5680e-03, -7.2054e-02, -9.4008e-02,
         8.3971e-02, -2.3161e-02, -1.4852e-01, -1.9535e-01,  2.3322e-01,
         1.2620e-01, -8.0627e-02, -1.0096e-01, -1.0719e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4677e+00, -3.0832e+01, -3.7025e-01, -2.3608e-01, -1.9808e+00,
         4.0500e-01, -1.6193e-01,  1.5230e-01,  5.6901e-01,  8.0336e-02,
        -1.5047e-01,  3.8434e-03,  8.2569e-02, -1.1834e-01,  3.8662e-01,
        -5.0114e-02,  1.9156e-03, -1.9978e-01,  3.3266e-01, -7.4853e-03,
        -1.9986e-01, -2.0238e+00,  1.8860e-01, -9.0747e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0136e+00,  6.5404e+00, -5.1661e-01, -3.4239e-01,  8.7818e-02,
        -2.9810e-01, -3.4237e-02, -3.1314e-01, -1.1170e-01, -7.7819e-02,
        -1.4433e-01,  4.9635e-02,  3.3411e-01,  9.7318e-02, -9.2137e-02,
        -3.4711e-02, -2.3350e-01,  1.4248e-01, -2.7715e-01, -6.8446e-03,
         1.3578e-01,  1.0377e-01,  5.6808e-02, -5.9101e-02,  1.3166e-01,
         1.0030e-01,  3.8657e-02,  4.4361e-02, -5.2058e-03,  4.3847e-02,
         1.0714e-01,  5.7485e-01,  3.2322e-01, -8.2001e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.8904, -13.5738,   0.7394,   0.3063,   0.2743,  -0.1786,  -0.1506,
          0.0314,  -0.3000,  -0.3116,  -0.0498,   0.0812,  -0.0714,   0.2410,
          0.0337,  -0.0797,  -0.2058,  -0.0464,   0.2230,  -0.3522,  -0.3760,
         -0.2532,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.2616, -21.3153,  -0.9660,   0.9643,  -0.3883,   0.7953,   0.3836,
         -0.1007,  -0.1478,  -0.1582,  -1.0861,   0.0258,   0.0755,  -0.0323,
         -0.1197,  -0.2524,   2.4303,   0.4065,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1200e-01,  2.9064e+01,  4.4565e-01,  1.1819e+00, -1.6771e-01,
         4.6526e-01,  4.4347e-01, -1.0283e+00,  9.9073e-02,  4.3288e-01,
         1.7177e-01,  2.7481e-01, -3.6886e-01, -4.6791e-01, -4.7229e-01,
        -4.6244e-02, -4.7556e-02, -2.8262e-01,  1.6429e-02, -2.5945e-01,
        -4.9366e-01, -2.0511e-01, -1.5109e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9904e+00, -4.2350e+01, -2.6598e-01, -1.5135e+00,  8.6505e-01,
         4.0635e-01,  7.6209e-01,  9.3335e-03,  9.6873e-01, -5.0924e-01,
        -1.6129e+00, -6.1469e-01, -1.6460e-01, -3.7990e-01, -3.5084e-01,
        -7.7698e-01, -1.8157e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2875e-01,  2.9642e+00, -1.0911e-01,  9.8994e-02,  7.8455e-02,
        -6.9545e-02,  1.3369e-01, -6.1189e-02,  4.5428e-02,  1.0602e-01,
         1.5551e-02, -1.9935e-03,  7.3081e-03,  1.3198e-02, -4.8278e-02,
         1.1252e-01, -8.8180e-02,  1.1326e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7514e-01, -1.5093e+01,  5.7361e-01,  1.0675e-01, -2.5819e-01,
        -4.4622e-01,  9.9560e-02, -5.4114e-01,  5.2670e-01, -8.2812e-01,
        -2.2167e-02,  5.3783e-02,  1.1521e-01,  4.6593e-01,  3.7535e-01,
         4.1760e-01,  3.1735e-01,  4.3370e-01, -6.1598e-01, -3.2512e-01,
        -3.5478e-01, -1.3778e-01, -3.0338e-01,  3.0583e-01,  1.3154e-02,
         9.5024e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2057, 19.2283,  1.7926, -0.6862, -0.5748,  0.3006, -0.1704, -0.1282,
        -0.8235, -0.3286,  0.2290,  0.3627, -0.0910, -0.2199,  0.0541, -0.0973,
        -0.1659,  0.1740,  0.2158,  0.2061,  0.1201, -0.2033,  0.1064,  0.0206,
         0.0518,  0.0384,  0.1056,  0.1300,  0.1300, -0.0481, -0.5049,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  2.3690, -31.3206,  -0.0525,   0.4477,   0.7089,   0.3796,  -0.5670,
         -0.1928,   0.3916,  -0.9241,   0.5961,   0.5010,   0.1469,   0.2740,
          0.7136,  -0.3227,   0.4017,   0.8880,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2168, 19.3825, -1.2267, -0.9720,  0.5247,  1.4280, -0.8928, -0.8598,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 9.1233e+00,  7.5730e+01, -1.7352e+00, -1.7670e+00, -3.0146e-01,
         6.8475e-01, -5.1511e-01,  9.0687e-02,  3.8825e-01,  2.7530e-01,
         2.0870e-01, -1.6814e+00,  2.7833e-02,  5.8169e-01,  3.2048e-01,
         5.2071e-01, -2.9719e-01, -4.5118e-01, -4.2894e-01, -9.8651e-01,
         2.0530e+00, -6.6543e-01, -5.3414e-01, -4.6031e-01, -3.2128e-01,
        -2.8345e-01, -7.6913e-01,  2.5774e-02, -1.3386e+00,  2.0243e-01,
        -2.1321e-01, -1.3894e-01,  1.7946e-01,  1.2388e-01, -2.9079e-01,
         3.6289e-01,  1.2715e-01, -5.2047e-01, -3.5685e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7011e+00,  1.6177e+01, -1.3454e-01, -1.3574e-01, -1.1940e-01,
        -6.8048e-02,  4.9301e-02, -6.7368e-02, -5.8221e-02,  1.6423e-01,
         3.8814e-02,  1.0483e-02,  2.5546e-01,  2.2090e-02,  1.9700e-01,
         6.8725e-02, -2.0661e-01, -1.8897e-01,  1.8751e-01,  4.4208e-01,
         1.3993e-01, -1.0256e-01, -2.6669e-01,  8.7615e-02,  5.6850e-02,
        -5.5652e-01,  1.8392e-01, -5.0128e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7819e+00, -7.7206e+00,  1.8394e-01,  1.0548e+00, -1.6600e-01,
         1.1975e-01, -2.0319e-01, -5.3999e-01, -5.4979e-02, -2.5379e-01,
        -3.6197e-03, -1.1632e+00, -1.4331e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6958, 40.4348, -1.5415,  2.4922,  0.4764,  1.8015,  0.2459,  1.2885,
         0.7332,  0.1597, -0.1390, -0.8528, -1.0383,  2.8384, -1.8527,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9316e+00,  1.3161e+01,  4.4142e-01,  1.5480e-01,  6.6169e-01,
        -6.2627e-01,  5.3463e-02, -2.4398e-01, -3.4137e-02,  4.2862e-02,
        -4.0927e-01,  9.1001e-02,  6.1409e-02,  2.6732e-01,  9.0051e-02,
         1.3825e-01, -1.1594e-01,  1.1836e-01, -1.3536e-01, -1.7193e-01,
        -6.4270e-02, -1.0808e-01, -8.0961e-03,  2.0479e-01, -8.5400e-02,
        -7.5511e-02, -6.8401e-02, -7.4699e-02, -2.7473e-02, -8.5071e-02,
        -2.0273e-01, -1.3461e-01, -2.4140e-01, -2.5404e-01,  1.9860e-01,
        -1.1984e-01, -4.4927e-02, -9.2779e-02, -3.0635e-02, -8.3408e-02,
         4.8949e-02, -8.9566e-02, -4.3991e-02, -1.9820e-02, -1.2445e-01,
        -8.3635e-03,  1.3956e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4953e+00,  2.4772e+01,  1.6210e+00, -1.5293e-01,  4.7774e-01,
        -1.4006e-01,  1.3243e-01, -2.4669e-01, -3.2563e-01, -2.0568e-01,
         8.0436e-02,  1.1137e-01,  1.9128e-01,  1.9765e-02,  4.9206e-03,
        -8.4538e-02,  1.1981e-01, -3.5681e-01,  3.7410e-01, -9.1487e-03,
         1.9215e-01,  7.2306e-02,  1.8930e-01,  6.7523e-02,  5.9300e-01,
         2.9561e-01,  4.5134e-01,  5.3012e-02, -1.1404e-01,  5.4690e-03,
         6.1747e-02,  1.8954e-01,  3.4819e-02,  2.4761e-01,  1.0749e-01,
         1.5618e-01, -1.1169e-01,  2.8483e-01,  2.6575e-02, -1.7832e-01,
         1.0250e+00,  1.2182e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.3390, -45.5704,   0.5349,   0.5577,  -0.6789,  -1.9308,  -1.2485,
          1.7857,  -0.1719,  -0.3381,  -0.5512,   0.7716,   1.4478,  -0.5639,
         -1.2521,  -2.3768,  -0.9388,   0.1851,   0.5115,  -0.7700,   1.0460,
          0.6625,  -0.2097,   2.5738,   2.3110,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3448e-01,  4.0113e+00,  1.6497e-02, -5.7254e-02, -4.0670e-02,
        -8.0849e-02,  1.1268e-01, -8.4696e-04, -2.3022e-02, -1.9559e-02,
         1.6045e-02,  3.3472e-02, -7.4103e-02,  1.7362e-03, -1.3213e-01,
        -2.5217e-02, -5.6842e-03,  5.7262e-02,  1.8699e-02, -2.9026e-02,
        -4.8575e-03, -4.0799e-02, -2.4005e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1563e+00,  1.7185e+01,  1.0990e+00, -3.8342e-01, -1.2096e+00,
        -9.2294e-02, -4.0647e-02, -1.4473e-01, -9.6654e-01,  8.8569e-02,
        -3.2631e-01,  1.6178e-02, -4.9185e-01, -2.3003e-01, -3.6396e-01,
        -9.0518e-02, -3.9119e-01,  1.6313e-01,  1.4031e-02, -2.8330e-01,
         6.1669e-02, -5.2162e-01, -3.9775e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0255, 22.4095,  0.1668, -0.9337,  1.0852,  0.6262,  1.3416,  1.9254,
         0.2844,  0.6714, -0.7752,  0.7158,  0.3316,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.5564e-01,  2.2104e+01,  8.3540e-02, -1.1152e-01, -4.7460e-01,
        -4.6496e-01, -2.2033e-02,  8.3166e-01,  2.3270e-01,  5.0680e-01,
         1.2699e-01, -6.9752e-02,  4.0885e-01,  6.8751e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0081e-01,  1.3010e+01,  8.6343e-01, -6.7569e-01, -2.5198e-01,
         2.0797e-02,  6.7733e-03, -1.0429e-01, -6.9415e-02, -7.3466e-02,
         5.2395e-02, -1.6389e-02,  8.1641e-02,  1.3271e-01,  6.2978e-02,
        -9.1650e-02, -2.8041e-01,  1.2333e-01,  4.6323e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 1.2184e+00,  1.0053e+01,  3.1043e-01,  9.5311e-03, -1.6465e-01,
        -3.5155e-02, -5.4107e-02,  7.2034e-02,  9.9284e-02,  3.4561e-02,
        -2.7327e-02,  9.9000e-02, -2.5191e-02,  1.6522e-01,  4.5980e-02,
        -8.2115e-01,  1.4752e-01, -5.1667e-02, -5.6007e-02,  3.5869e-01,
         6.6283e-02, -1.9383e-02, -7.4811e-02,  1.5326e-02,  1.7030e-01,
        -3.7503e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7839, 56.5774,  4.2323, -2.2441,  1.1919,  2.0884,  0.3542, -0.4844,
        -0.9944, -0.2989,  0.5203,  2.0496,  0.6944,  0.1236, -2.9355,  2.7490,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3465,  8.5631, -0.6985,  0.5992,  0.1328,  0.3834, -0.1277,  0.1442,
         0.1004, -0.0402, -0.0193,  0.0119,  0.0587, -0.0288, -0.2333, -0.4562,
         0.1095,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2582, 27.8840, -0.4767, -0.4730,  3.1227, -0.8216,  0.7431,  2.4702,
         0.8795,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2888e+00,  1.7121e+01,  1.0331e-01,  2.0663e-01, -9.5645e-01,
        -1.7237e-01,  3.2863e-01,  5.6672e-01,  2.6057e-01, -4.0109e-01,
         4.6057e-01, -6.4416e-01, -7.2040e-02,  6.9753e-02, -2.5290e-01,
        -5.9144e-02,  2.9410e-02,  1.7464e-01, -1.7404e-01,  8.1903e-02,
         1.7504e-01,  1.1577e-01,  2.0717e-01,  5.1795e-02,  1.9635e-02,
        -5.9627e-02, -2.2093e-01,  8.1049e-03,  1.2082e-01, -1.1217e-01,
        -1.4868e-02,  9.1896e-02, -3.6763e-03, -8.6337e-02, -2.2341e-02,
        -4.1658e-01, -2.6945e-02,  4.9742e-01,  9.6565e-02,  5.0249e-02,
        -2.1818e-01, -8.4141e-02,  1.8185e-01,  8.7058e-01, -2.8379e-02,
         2.7155e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6784, 19.9255, -1.4771, -0.5928,  1.0098,  0.4215,  0.1740, -0.1621,
        -0.4971, -0.0475,  0.1138, -0.2139,  0.1317,  0.1731,  0.0393, -0.0418,
        -0.2410,  0.1925, -0.2357,  0.6429, -0.3117,  0.1995,  0.0772, -0.3040,
        -0.4847,  0.0654, -0.1077, -0.0458, -1.0221,  1.0662,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.5956, -13.5779,   0.5589,   0.9943,  -0.1842,   0.7709,  -1.6024,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4726e+00,  1.8903e+01, -4.9335e-01, -4.5543e-01, -4.0174e-01,
        -4.2959e-01, -4.5717e-01, -1.8284e-02,  1.7606e-01, -1.5343e+00,
        -2.4562e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7507, 19.1026,  0.6540,  1.5247,  0.1172, -0.2868,  0.4507, -0.4253,
         0.1569,  0.3411, -0.3218,  0.1349,  0.4032,  0.4149,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4303e-01, -2.8851e+01, -2.1487e+00,  1.0953e+00, -9.8710e-02,
        -1.3296e-03, -1.0606e-01, -4.5612e-01,  1.1298e-01,  6.6039e-01,
         2.8638e-01,  3.0170e-01,  5.0564e-01,  2.3552e-01, -6.5926e-03,
         2.2207e-01,  3.8104e-02,  9.0622e-03,  2.2623e-01, -5.1115e-01,
         1.0544e-02, -8.1495e-02, -2.8530e-01,  1.6431e-01,  4.5823e-01,
         2.9452e-01,  5.7879e-01, -3.2824e-02,  7.0240e-02,  3.0992e-01,
         8.4730e-01, -1.9730e-01,  6.7065e-01,  7.4586e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8853, 61.6568,  7.5735, -1.3233,  2.4345,  1.8347, -0.4540, -0.8616,
        -0.6044, -0.6023, -0.7539, -1.2054, -0.2775,  1.1345,  3.3638, -0.2735,
        -0.1073, -1.1758,  0.1583, -0.1125,  0.6229,  0.4099, -0.1881, -0.1508,
        -0.3839, -0.2179,  0.3408,  4.3771,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3730e+00, -6.0534e+01, -1.1874e+00, -3.3485e+00, -2.8605e-01,
         7.3607e-01, -8.1841e-02, -2.4701e+00,  2.0299e-01, -1.1184e-01,
         1.8343e-01, -2.6820e-01, -2.9034e+00,  2.7163e+00,  1.0616e+00,
        -6.0059e-01, -2.0112e+00,  4.0579e-02,  2.4225e-01, -5.2621e-01,
        -5.0770e-01, -1.8902e-01, -3.4800e+00,  4.3734e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 8.2383e-02,  8.1255e+00, -1.4405e-01,  1.9297e-01, -1.2055e-01,
        -9.9657e-02,  1.5415e-01, -7.8565e-02, -1.4419e-01,  4.6002e-02,
        -1.3735e-01, -1.0155e-01,  4.3145e-02, -1.9684e-02,  4.7484e-02,
        -1.2066e-01, -5.1118e-05, -6.9430e-02, -2.2229e-02, -1.5932e-01,
         2.4844e-02, -7.1644e-02, -9.2494e-02, -5.6579e-02,  8.4487e-02,
         2.7423e-01, -8.6299e-02,  3.7848e-02, -4.6771e-02, -1.3879e-01,
         1.0993e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.3959, -11.3882,   1.7089,   0.4698,   0.4672,   0.4771,   0.1329,
         -0.2408,  -0.5799,   0.1507,   0.0838,  -0.1540,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2530, 15.9949,  0.3589, -0.4923,  0.1987,  0.1544,  0.1075,  0.1693,
        -0.0876,  0.0647, -0.3188, -0.1390,  0.6688,  0.0341,  0.3428,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.3429e-01, -1.4096e+01,  2.9779e-01,  7.3291e-01, -3.5781e-01,
        -1.5444e-01, -7.3498e-02, -2.6255e-01, -1.9673e-01,  1.5221e-02,
        -6.7611e-02, -2.1780e-01, -6.6782e-02,  1.5484e-01, -3.4587e-02,
        -1.6379e-01, -4.0618e-01, -2.2167e-01, -3.3255e-02,  4.8525e-01,
        -1.2591e-01, -7.2470e-02, -2.5824e-01, -4.1536e-01, -3.1669e-01,
        -5.2341e-02, -1.0796e-01, -6.6326e-02, -1.4199e-01, -2.5623e-01,
        -1.4259e-03, -3.3796e-01, -4.8322e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -2.0402, -21.5953,  -0.8384,   2.6859,  -0.5918,   0.4592,   0.3481,
          0.4579,   0.0754,  -0.0707,   0.2161,   0.1473,   0.5946,   2.2083,
         -2.1783,   1.2218,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1803, 26.8380,  1.1633, -1.1396,  2.3404,  1.4664,  2.1207,  1.3349,
        -1.4050,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -4.4807, -33.4043,   0.5898,  -4.4773,   4.0581,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1924e-01,  5.0409e+01,  9.8795e-01, -3.7876e-01, -1.2265e+00,
        -8.5213e-01, -8.3789e-01, -6.6574e-01, -2.5327e-01, -3.6964e-01,
         1.3387e+00,  4.8561e-01, -2.5785e-02, -8.3213e-01,  3.3058e-01,
        -2.7970e-01,  2.6766e-01, -8.6998e-02,  5.1334e-01, -2.4328e-01,
        -1.7467e-01, -2.9483e-01, -1.3252e-01,  3.4073e-01,  2.4937e-01,
        -6.7936e-01, -3.5859e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -3.2025, -22.6083,   1.8766,   0.3831,  -1.7720,  -1.4700,  -0.0369,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8671, 23.9682,  0.6134, -0.6757, -0.8294,  0.0663,  0.1577, -0.2713,
        -0.4352, -0.3021, -0.2816, -0.5352, -0.1849, -0.1498, -0.1114, -0.0948,
        -1.6281,  2.4638,  0.0286, -5.8012, -0.8360, -0.9424,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7868e-01,  3.0171e+01,  1.4733e+00,  7.8263e-01, -3.4031e-01,
        -6.4940e-01, -7.6125e-01,  7.0377e-01, -7.2194e-02,  5.7086e-01,
         2.6753e-01,  4.2201e-01, -1.2696e-02,  8.3773e-01,  2.8269e-02,
        -3.9203e-01, -6.8438e-01, -5.3366e-01, -4.7486e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2135e+00,  1.4485e+01,  1.0532e+00, -1.5824e-01, -1.6465e-01,
        -5.7562e-01, -3.9347e-01,  1.0636e-01,  4.2127e-02,  2.0993e-01,
        -3.4427e-02, -3.6255e-02,  2.5722e-02, -3.4918e-02, -4.3833e-02,
         1.0156e-01,  3.2325e-02,  7.9618e-03, -3.8651e-02, -6.9704e-02,
         1.4249e-01, -2.4373e-02, -4.1805e-02,  1.1373e-01,  1.1926e-01,
        -1.6738e-01, -2.8667e-01, -2.5025e-01, -1.4752e-01,  2.9767e-01,
        -4.6904e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-4.9679e+00,  3.4948e+01,  7.0568e+00, -8.7760e-01,  3.6246e-01,
        -8.9775e-01, -1.0286e+00,  1.9784e-01, -3.5954e-01,  3.4538e-01,
         2.9823e-01,  5.4539e-01,  1.4043e+00,  8.5380e-01,  5.9750e-01,
         8.0113e-01,  9.2078e-01,  5.2483e-01, -2.1038e+00,  2.9789e-01,
         2.1762e-01, -2.6117e-01, -3.7680e-01,  5.7590e-01, -7.1267e-02,
         1.6038e-01,  9.8818e-03, -2.1204e+00, -1.0300e-02, -2.6807e-01,
         1.0081e+00, -3.0701e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4334e+00, -2.2087e+01, -9.0231e-03, -1.2235e-01,  4.8298e-03,
         1.9449e-02,  8.7089e-02, -3.8123e-01, -2.9956e-01, -1.3037e-01,
         3.0698e-01, -7.2247e-02,  5.0105e-01, -6.5022e-02,  3.6261e-01,
        -2.0676e-01, -4.4802e-01, -1.2834e-01, -1.9310e-01, -5.1690e-01,
        -6.2391e-01, -9.0114e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6016e-01,  6.0816e+00,  4.3296e-01,  1.1850e-01,  9.0403e-02,
         4.2757e-02, -3.5046e-01,  4.6761e-02,  1.1727e-01,  1.5678e-02,
        -5.0599e-03, -9.7290e-02, -7.3350e-02, -3.3220e-02,  7.0979e-02,
        -1.1116e-02, -3.1956e-02,  1.5718e-03, -3.9412e-02, -1.1197e-01,
         3.0632e-02, -1.0279e-01,  5.8686e-02, -4.5383e-02,  1.9638e-02,
        -6.6492e-02, -1.4230e-02, -1.8513e-01, -5.1291e-02, -1.5506e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4003e+00,  2.0397e+01, -1.2743e+00, -6.9358e-01, -2.3567e-01,
         1.5539e-02,  7.0956e-01,  1.5192e-01, -1.9873e-01, -6.9080e-01,
        -9.5290e-02,  3.5303e-01, -1.0238e-01,  6.4460e-02,  1.3204e-01,
        -8.2036e-01,  4.8550e-01,  8.1681e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2352e-01, -2.3643e+01, -2.0930e+00,  5.6334e-02,  5.1129e-01,
        -1.9514e-02,  1.0653e-01, -3.5665e-01,  8.0527e-01, -1.1487e-01,
        -1.3923e-01, -6.8347e-02,  9.8726e-02, -2.0881e-01,  3.7651e-01,
         2.5632e-01, -8.2505e-02, -1.7263e-01,  4.0283e-01,  2.6552e-01,
         1.2524e-02,  9.8079e-02,  4.1618e-01, -6.9382e-02,  2.6335e-01,
         2.0526e-01, -4.9646e-02, -1.3058e-01,  3.5283e-02,  3.6851e-02,
         1.4828e-01, -8.2995e-02,  2.9533e-03, -3.2369e-01,  9.2024e-02,
        -7.4606e-01,  2.5541e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5590e-01, -1.0743e+01,  2.1037e+00,  4.4520e-01,  9.6318e-02,
         6.1261e-02,  1.0348e-01,  2.4011e-02, -7.0438e-03, -3.4121e-02,
        -1.6801e-01, -2.2337e-01, -1.2100e-01, -1.8002e-02,  2.0326e-01,
        -1.2656e-01, -5.6271e-02, -8.5523e-02, -9.6216e-02, -1.3300e-01,
        -2.5396e-01,  3.4806e-01,  5.9951e-02,  1.7765e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5673e-01,  1.4803e+01,  2.9782e-01,  2.3920e-01,  1.0445e+00,
         8.2864e-02, -2.0192e-01, -5.8097e-02, -3.0123e-01,  8.6980e-02,
        -2.3614e-01,  8.2208e-02, -4.9857e-01, -3.2251e-02, -1.4828e-02,
         4.0793e-02, -3.6456e-02, -3.4326e-02, -7.6020e-02, -1.5100e-02,
         4.7802e-02,  2.1277e-02,  3.2574e-02,  3.8448e-02,  2.2361e-01,
         7.9453e-02,  1.1203e-01,  1.4310e-01,  3.8026e-01,  4.5661e-03,
         9.4329e-02,  4.3274e-01,  1.5729e-01,  1.1571e-01,  1.2365e-01,
         3.8515e-02, -1.0111e-01, -7.9565e-02,  1.1220e-01, -8.9737e-02,
        -5.2266e-02, -1.1278e-01, -1.7418e-01,  2.7168e-01, -5.9466e-02,
         9.7693e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6734e+00,  2.1481e+01,  7.0044e-01,  1.3362e+00, -1.0926e-02,
         3.5341e-01, -3.1262e-01, -5.3099e-01,  4.5601e-01, -8.4543e-01,
         2.7634e-01,  3.7667e-01,  3.6379e-02, -4.4530e-01,  2.8963e-02,
        -3.8003e-01, -3.9512e-01, -6.7695e-01,  4.1829e-02,  3.3302e-01,
         6.9920e-02, -5.3718e-01, -1.0421e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0837e+00,  3.6433e+01,  2.3724e-01,  8.9683e-01,  6.3865e-01,
         8.5730e-01,  8.0057e-01, -6.8772e-01, -3.2855e-01,  2.0776e+00,
         5.7755e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4391,  8.0047,  0.5455, -0.3421,  0.0425, -0.0517,  0.2993, -0.2361,
         0.2703, -0.1155,  0.2950, -0.0526,  0.5280, -0.1098,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3440,  7.6253,  0.0746, -0.5231,  0.1209,  0.2601, -0.0714, -0.0794,
         0.1935, -0.1193,  1.1076, -0.2143,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5238e+00,  4.5214e+01,  5.1448e+00,  5.4787e-01,  9.7089e-01,
        -3.5569e-01,  2.2599e-03,  3.2852e-01, -2.4240e-01,  9.6073e-02,
        -7.2446e-01,  4.4515e-02,  4.8285e-01,  3.4968e-01, -2.6401e-01,
         1.7779e-02,  1.6426e-01, -6.8350e-01,  3.8015e-01, -7.2428e-01,
         8.2367e-02, -2.7470e-01, -1.0828e-01,  1.0886e-01,  1.5043e-01,
        -4.7929e-01,  1.4659e+00, -3.4020e-01,  1.1191e+00, -2.4913e-01,
         5.9322e-01,  2.9321e-01, -1.7517e-01,  2.2587e-01,  1.1550e-01,
        -1.6210e-01,  1.8797e-01,  7.3037e-02, -2.0642e-01, -1.4123e-01,
         9.1376e-02,  4.2904e-02,  1.5096e-01, -9.6112e-02,  6.5065e-02,
         1.4601e-01,  2.2078e-01, -3.3320e-02, -9.0744e-02, -1.9881e-01,
         6.6522e-02,  1.9036e-01, -1.2269e-01,  8.7391e-01,  4.9506e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-1.3028e+00, -2.8877e+01,  1.9293e+00,  1.3304e-01, -2.0969e+00,
        -1.5209e+00,  2.4879e-01,  5.5144e-01,  1.7307e+00, -4.2369e-01,
         5.6185e-03,  3.2061e-01,  2.4406e-01,  5.2705e-01, -4.0525e-01,
         1.4123e-01,  5.4391e-01,  1.5479e-01, -4.2851e-02, -2.7944e-01,
        -2.0405e-02, -2.1183e+00,  1.1855e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6995, -6.6714, -0.4720, -0.3958, -0.1557, -0.1516, -0.0944, -0.3932,
         0.0411, -0.3252,  0.0348,  0.0391,  0.1054,  0.1472, -0.0258, -0.1334,
         0.4922,  0.0360,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3240, -4.8132,  6.4637,  0.0493,  0.7746,  0.6204, -0.0770,  0.6786,
        -0.1432,  0.0455, -0.1210,  0.4375,  0.8590,  0.1647,  0.2095,  0.7151,
         0.0315,  0.3244,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9162e+00,  3.0888e+01,  1.7284e-01, -8.5228e-01,  3.6344e-01,
         1.3345e+00,  2.2518e-01, -6.4753e-01, -1.8810e+00, -4.6461e-01,
        -2.9819e-01,  5.8405e-01,  2.4043e-02, -2.6779e-01, -2.1829e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8457e-01,  1.9142e+01,  1.3578e+00,  1.1744e+00, -2.6027e-02,
        -5.1677e-01, -6.8791e-01,  1.2512e-01,  3.9231e-01,  1.1330e-01,
        -2.8038e-01, -1.1102e-01, -1.3902e-02, -3.0327e-01, -3.2363e-01,
         2.8865e-01,  1.6383e-01,  1.0952e-01,  2.1560e-01, -3.6333e-01,
         1.5515e-01, -7.2737e-01,  5.6082e-02, -9.8636e-02,  2.4485e-01,
         2.7159e-01,  3.1689e-01,  1.2297e-01,  4.8621e-02,  6.8911e-01,
         6.7400e-01, -8.7803e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1281e+00,  5.1973e+01, -1.3021e-01, -5.8394e-01, -2.9011e-01,
         4.7728e-01,  1.6756e-01, -1.4499e-01,  1.0563e-01, -4.6340e-01,
         7.2936e-01,  2.8230e-01,  3.7468e-03, -6.4197e-01, -4.5031e-01,
        -9.4026e-01, -1.5191e-02,  4.1481e-01,  6.7534e-02,  2.6814e+00,
        -9.7674e-01, -7.1837e-01, -5.6035e-01, -4.4020e-01, -3.3586e-01,
         7.2273e-01, -2.6631e-01, -1.7958e-01, -8.1827e-01, -1.5322e-01,
         1.2740e-01,  1.1159e-01, -5.5969e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0758e+00,  4.2286e+01,  2.1788e+00,  2.0690e+00,  1.1348e+00,
         1.1380e+00, -5.6225e-01,  1.0725e-01,  2.4466e-01, -9.5816e-02,
        -3.3660e-03, -1.7564e-01, -1.6998e+00, -1.3368e+00,  1.2670e+00,
        -1.0316e+00,  6.7654e-01, -1.4233e-01,  3.2957e-01, -4.5780e-01,
        -3.7110e-01,  2.3945e-01,  1.4876e-01,  6.4502e-01, -6.5067e-02,
        -5.3609e-01,  2.0713e-01,  2.0224e-02,  8.9995e-02,  1.6272e-01,
         1.8591e-01, -3.0275e-01,  9.2322e-02,  2.4515e-01,  5.8581e-03,
        -2.6471e-01, -3.6086e-01,  2.5406e-02,  2.1919e-02, -4.5649e-02,
        -2.4967e-01, -1.0458e-01, -1.2854e-01,  1.4888e-01,  4.0046e-01,
         4.5898e-01,  6.6609e-02, -1.1214e-01,  2.5581e-01,  4.6474e-01,
        -9.2290e-02,  2.5277e-01,  3.1710e-01,  6.2122e-02,  6.4919e-01,
         1.2149e+00,  8.8405e-02, -1.4619e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8543e+00,  2.0191e+01, -9.6804e-01, -2.1868e-01,  6.2300e-01,
         2.2649e-01,  1.9972e-01,  1.0610e-01, -2.7881e-01,  3.3291e-01,
        -6.8540e-01,  2.7056e-01, -5.8942e-01, -1.2147e-01,  1.4989e-01,
         1.2976e-01,  6.5988e-02, -1.4659e-01,  1.5126e-02, -9.1134e-02,
         1.7127e-01, -1.0873e-01, -2.3343e-01, -1.5141e-01, -3.7646e-02,
        -2.7059e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5967, 15.6850,  0.3110, -0.6205,  0.1349,  0.0753, -2.3131, -0.0679,
        -0.9079,  0.0366, -0.0732,  0.4309,  0.4671,  0.1514, -0.1783, -0.0420,
        -0.1955,  0.1161,  0.1520,  0.7571, -0.0909, -0.1679,  0.0339, -0.1924,
        -0.7223,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.6478e-01,  1.1573e+01,  4.8076e-01, -4.0134e-01,  2.5213e-01,
        -1.5111e-01,  7.8220e-02, -6.1438e-01, -4.2731e-01, -1.4335e-01,
        -4.0739e-02, -6.0030e-01,  1.4386e-01,  2.4792e-01,  2.2849e-02,
        -6.1176e-02, -5.7745e-02,  7.4435e-02,  6.8748e-04, -1.4404e-01,
        -1.0814e-01,  2.2369e-01, -1.2017e-01, -1.5614e-02, -1.9462e-01,
         4.8220e-02,  2.8789e-02,  3.0859e-02, -6.0137e-03, -2.5215e-02,
         9.1178e-02, -1.1638e-01, -5.3200e-02,  5.2479e-03,  5.1758e-02,
         1.3363e-02, -1.6682e-02, -1.2487e-01, -1.3735e-02,  4.5258e-02,
        -1.0988e-01, -1.8238e-03,  6.5472e-02, -1.7944e-02, -1.1127e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8229, 16.3128, -1.8041,  0.8193, -0.4731,  0.5897,  0.0683,  0.1719,
        -0.1762, -0.1558,  0.3732, -0.0758, -0.4895,  0.1788,  0.0526,  0.2754,
        -0.2485,  0.0278,  0.0671,  0.1400,  0.1232, -0.2143, -0.2888,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -3.5954, -46.1146,   3.3443,   1.4697,   0.1668,  -2.5674,  -1.5386,
          0.2552,  -0.9754,  -0.3993,  -0.6043,   2.3466,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.0735, 20.6340,  2.6365, -0.7363, -0.1473,  0.4191,  0.0362,  0.3485,
         0.0455, -0.6174,  0.0223, -0.3662, -0.3481,  0.0930, -0.3702, -0.1765,
        -0.2286,  0.0449, -0.1013, -0.6351, -0.4332, -0.0804, -0.3266,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1725e-01,  3.2836e+01,  1.0578e+00, -7.1488e-01,  3.7605e-02,
        -3.8366e-01, -2.6288e-02,  6.4200e-01, -3.3819e-01,  1.6475e-01,
        -2.3474e-01, -4.7060e-01, -3.8465e-01, -3.6526e-02, -4.8301e-01,
        -2.2902e-01,  1.1715e-01, -5.7918e-01, -1.7386e-01, -7.9297e-01,
         4.7271e-01, -1.5826e-01,  3.0487e-02, -1.8632e-01, -2.0602e-01,
        -3.4796e-01, -1.6445e-01, -1.8226e-01, -4.2758e-01,  2.5025e-01,
        -2.8372e-01, -7.3987e-01, -2.9136e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  6.8664, -45.0431,  -2.4143,  -0.4015,   0.2407,  -0.7760,  -0.7511,
         -0.5468,  -0.5676,   0.8093,  -0.7948,  -0.1489,  -0.1628,  -0.1304,
         -0.8359,  -0.1667,   0.5937,  -2.6338,   1.3590,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -2.9561, -38.7250,   0.7992,   0.2256,  -0.4533,  -1.2625,  -0.5184,
          0.4657,   0.3548,   1.1084,  -0.6035,   0.4137,  -0.2345,  -1.9377,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4195e-02,  2.9850e+01,  3.1365e-01, -5.7363e-02, -8.7482e-01,
         5.7188e-01, -6.0203e-02,  3.8542e-01,  7.4885e-02,  9.6904e-02,
         8.8982e-03, -1.0318e-02,  1.0694e-01, -6.0593e-01, -4.0944e-01,
        -2.0088e-01, -2.0041e-01, -1.2699e-01, -1.4395e-01, -7.9609e-02,
         5.1243e-02, -5.9673e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4824e-01, -2.9009e+01,  3.1524e-01,  2.1211e-01, -2.2345e-01,
        -3.8608e-01,  3.1709e-01,  5.8256e-01,  1.3600e-01, -6.4474e-01,
        -1.8629e-02,  8.7070e-01,  9.7664e-01,  1.4306e-01, -1.7039e-02,
        -8.0060e-02, -1.2963e-01,  4.0750e-01, -1.6790e-01, -1.2502e-01,
         4.4668e-02, -1.6444e-01,  9.5959e-02,  8.5134e-02,  1.4482e-01,
         5.6552e-02, -1.1201e-01, -2.5725e-01,  6.5961e-01,  1.3935e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9951, 12.1921, -0.7094, -0.2506,  0.6632,  0.4507, -0.0400,  0.2435,
         0.3362, -0.0871, -0.8973, -0.3066, -0.5485,  0.1670, -0.1418, -0.9854,
        -0.5627,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1350e-01, -3.4047e+01, -1.6415e+00,  1.3228e+00,  1.5529e+00,
         6.1820e-01,  7.8877e-01, -1.3675e-01, -2.4255e-02,  3.8292e+00,
         6.3044e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7155e-01, -1.5792e+01, -1.9586e+00,  3.1044e-01,  2.0853e-01,
         3.6261e-01,  2.1827e-01, -2.3838e-01,  2.0630e-01,  1.1290e-01,
         3.3224e-01,  3.6429e-01,  1.0776e-01, -1.2524e+00, -2.6015e-01,
         5.7702e-01,  1.5654e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0219e+00, -3.3794e+01,  7.7606e-01, -1.1764e+00, -7.3602e-01,
         5.1261e-01,  4.1916e-02, -3.7506e-01,  3.8161e-01,  2.1169e-01,
         2.5731e-01, -6.4429e-01,  1.9455e-01, -7.2685e-01, -1.0797e+00,
         1.8808e-01,  2.0370e-01,  4.8830e-01, -2.1503e-02,  4.0123e-01,
         5.9988e-02,  1.7913e-01,  7.8023e-02, -2.1754e-01, -3.2935e-01,
         2.7585e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4256, 11.5204,  0.7945, -0.5377, -0.5960,  0.4751, -0.5453,  0.1392,
        -0.1478, -0.1115, -0.1620, -0.1771, -0.0605,  0.0232, -0.1669,  0.5921,
        -0.1961,  0.0236, -0.0530,  0.1346, -0.0625, -0.1171, -0.0948, -0.1880,
         0.1192, -0.0840, -0.0121,  0.1048, -0.2306, -0.0827,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.2872, -45.5254,  -0.3454,   1.2438,  -1.5967,   0.7669,  -0.8230,
          0.7658,  -0.2352,  -1.8858,   0.6122,   0.1343,   0.3996,   0.6484,
          0.8200,  -0.8326,   0.3323,   1.2983,  -0.1698,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-5.2625e-02,  9.0527e+00,  3.4168e-01,  8.3329e-01,  1.0161e-01,
         6.5052e-03,  1.4008e-01, -3.3855e-01, -4.3702e-02, -6.7248e-03,
        -3.0947e-02, -1.0038e-01, -8.9677e-03,  3.2791e-02, -9.8422e-02,
        -1.1197e-01,  1.3098e-01,  1.4494e-01,  2.6879e-02,  1.2738e-01,
         1.5814e-02,  2.8543e-02,  5.4793e-02,  1.4516e-01, -1.3218e-01,
        -1.3577e-01,  5.9054e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1062e+00,  2.5688e+01, -7.6745e-01,  4.1007e-02,  5.9324e-01,
         4.3127e-01, -8.4333e-01, -5.1082e-01, -3.4670e-01,  2.3413e-01,
        -1.9531e-01,  1.8073e-03, -1.7912e-01, -6.7701e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3892e+00, -4.0242e+01, -1.0163e+00,  7.2497e-01, -1.1900e+00,
         3.7394e-01,  2.8082e-01,  2.6569e+00, -4.9399e-01, -1.0328e+00,
        -3.5888e-01,  1.6106e-01,  5.0841e-02, -4.2466e-01, -3.3721e-01,
        -7.7105e-01, -1.0456e+00,  5.2365e-01,  3.3256e-01, -1.1178e-02,
         3.8510e-01,  3.9325e-01, -9.8633e-02,  7.3875e-01, -4.9411e+00,
        -2.5318e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0420e-02,  3.0432e+01,  1.1106e+00,  6.8450e-01,  9.6757e-03,
        -3.2993e-01,  2.6429e-01, -8.2052e-01, -1.1001e+00, -1.6606e-01,
        -1.4804e-01, -1.2342e+00, -3.9518e-01,  3.4335e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5598e-01,  2.3791e+01,  1.4184e+00,  6.3169e-01,  1.8168e-02,
         8.7432e-01,  2.0226e-01, -7.1575e-02,  2.3364e-01, -6.4747e-02,
         7.6144e-02, -3.4120e-02,  1.4605e-01,  6.9235e-01,  3.2565e-01,
        -1.1224e-01,  2.2118e-01, -1.7812e-02, -8.7125e-02, -4.3410e-02,
         2.1698e-01, -2.7318e-01,  1.3998e-01,  2.1871e-01, -1.0097e-01,
        -1.3872e-01, -2.3666e-01,  4.2011e-02,  5.4368e-01, -5.5318e-02,
         5.8953e-01, -2.6211e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2714e-01,  2.6587e+00,  8.0388e-02, -2.0692e-02,  2.9796e-02,
         1.3009e-02,  3.8455e-02, -2.3307e-02,  2.4308e-03, -2.0613e-02,
        -5.6029e-04, -1.0310e-02, -8.8072e-03,  8.3672e-03,  7.1746e-03,
        -1.5243e-02,  9.5960e-03, -1.3085e-03, -4.7843e-02, -2.5727e-02,
        -2.5213e-02,  3.9055e-03, -3.1992e-02,  6.1449e-02, -1.6547e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0573,  4.1271,  0.0497,  0.0324,  0.1127, -0.2702,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9735e-01,  1.8535e+01,  7.9629e-01, -1.1011e+00,  6.3812e-01,
        -4.1201e-01, -1.3405e-01, -2.7703e-02, -5.8282e-02, -3.1449e-01,
        -3.3252e-01, -1.3768e-01, -1.7705e-01,  3.4021e-02,  1.4098e-02,
        -2.9775e-02,  9.4327e-02,  4.0243e-02, -2.5784e-01, -1.2788e-01,
        -9.5254e-02, -1.0694e-01, -2.7254e-01, -1.2297e-01, -1.9849e-01,
        -2.4812e-01, -1.2171e-01, -4.8253e-02, -6.9129e-02,  8.6732e-02,
        -2.4026e-02, -1.3235e-01, -1.1391e-01, -7.6076e-02, -1.4813e-01,
         1.5780e-02, -2.4808e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7826e+00, -1.3859e+01,  1.2258e-01, -5.8407e-01, -1.5028e-01,
        -9.5641e-02, -2.5871e-01,  2.9841e-01, -9.5556e-02,  3.3958e-02,
         3.1888e-02, -1.1818e-01, -1.0074e-01,  3.3591e-01,  2.2856e-01,
         4.6632e-01, -2.1087e-01, -1.7680e-01, -1.0743e-01, -7.3903e-02,
         2.3520e-01, -5.5256e-02,  1.0692e-01, -4.4602e-01, -7.1416e-02,
         2.3092e-02,  1.2411e-01, -1.8387e-01,  3.6896e-01, -7.0747e-02,
        -4.8156e-02,  1.0694e-02,  4.4627e-02, -1.6228e-01,  3.5757e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1420e+00, -7.0219e+01,  2.2414e+01, -5.5503e+00,  8.0584e-01,
        -4.7618e+00,  1.0483e+00, -6.9566e-03,  2.1944e-01,  1.4321e+00,
        -7.0424e-01,  2.5128e+00,  1.9130e+00,  1.3455e+00,  2.4936e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.8991e+00,  6.0136e+01,  2.9252e+00, -4.2091e+00, -1.2721e+00,
        -1.0647e+00,  1.3958e+00,  2.0024e-01, -3.3696e-01,  1.2567e+00,
         3.2303e-01, -5.3999e-01,  3.3359e-02, -5.1145e-01, -9.8133e-02,
         1.1593e-01,  7.4422e-02, -1.8870e-01, -6.7919e-01, -2.9542e-01,
        -2.8135e-01, -7.9243e-01, -8.2328e-02, -7.7827e-01,  1.5658e-01,
        -1.8140e-01, -1.6704e-01, -1.1906e-01, -1.4987e-01, -1.3541e-01,
        -6.4933e-02, -3.6883e-01,  1.6974e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1480e-01, -2.4281e+01, -5.9890e-01,  1.8380e-01,  2.3866e-01,
         2.1428e-01,  1.9223e-01, -2.5476e-01, -9.3790e-01,  1.9199e-01,
        -7.6449e-03, -4.5963e-01, -1.5016e-01, -2.1432e-01,  1.9937e-01,
         1.4345e-01, -1.1226e-01, -7.0600e-02,  6.0682e-01, -1.5992e-01,
         3.0147e-01,  2.0562e-01, -3.9599e-01, -9.4952e-02, -5.2583e-02,
        -1.9585e-01,  2.2868e-01,  9.4472e-01,  6.1342e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.8329, 13.3010, -0.0427, -0.6023, -0.6220, -0.3751,  0.0980, -0.2103,
        -0.0726, -0.9499,  0.0345,  0.0929, -0.0792,  0.3473, -0.7871,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3015e-01,  5.0774e+00,  9.0783e-02, -2.9918e-02, -8.7506e-02,
        -6.0495e-03,  3.1075e-02,  3.1916e-03,  8.8558e-02,  1.1978e-02,
        -3.5460e-02, -1.6876e-03,  4.8930e-02, -3.5181e-02,  2.9109e-03,
         1.1622e-02, -1.8613e-02,  2.5636e-02,  3.7905e-03, -1.1810e-02,
         6.3564e-03,  3.1341e-02, -1.4969e-02, -1.0323e-02, -8.9585e-03,
         1.6492e-03, -1.0092e-02,  3.5740e-02, -6.8419e-03, -1.3811e-03,
         9.4228e-03, -1.7423e-02,  8.8450e-03, -2.7528e-03, -5.3766e-02,
        -3.9762e-02, -2.3319e-02,  6.4708e-03,  3.0788e-02,  2.0696e-03,
        -1.4730e-02, -4.4218e-02, -3.7057e-03,  5.4025e-03, -5.5679e-03,
        -8.5267e-04,  8.3821e-03,  1.7821e-02,  1.0656e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0204e-01,  4.0044e+00,  2.2899e-02, -1.4012e-01, -1.5568e-01,
        -2.9015e-02,  2.2562e-02, -4.5894e-02, -1.0680e-01, -7.5127e-02,
        -1.5846e-02, -3.1188e-02,  5.7426e-02, -3.1315e-03, -4.2871e-02,
        -3.8860e-02,  1.0543e-01, -4.6198e-02,  9.5391e-03, -3.0428e-02,
        -1.0681e-01, -8.2579e-02, -1.0217e-02, -2.3572e-02,  8.6131e-03,
         2.6708e-02, -2.2038e-02,  1.1495e-01,  2.7316e-02, -4.4092e-02,
        -4.5654e-02, -5.4124e-02, -1.0426e-01, -1.8231e-01, -3.3149e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.7354e-01, -1.7749e+01, -1.3403e+00,  4.4773e-01,  5.5097e-01,
         5.0344e-02, -7.7145e-03, -6.7446e-02, -9.9907e-02,  1.1537e-01,
         3.8750e-01, -6.1294e-02,  2.4281e-01, -3.6685e-02,  1.6462e-02,
         1.6922e-01,  1.7017e-02,  1.3338e+00,  5.4181e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.7971e-01, -1.9607e+01,  6.0745e-01, -6.4170e-01, -2.2537e-01,
        -5.8982e-01, -5.0384e-02,  3.1710e-01, -5.1523e-01,  6.5293e-03,
         1.8897e-02,  1.2616e-01, -4.9080e-01,  1.2101e+00, -5.5152e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5338e-01,  1.2026e+01,  7.3248e-01,  6.0621e-02, -1.2733e-01,
        -7.8212e-01,  2.9706e-01, -2.2300e-01,  2.9894e-01, -7.7782e-01,
         8.9899e-02, -1.9607e-01, -1.0204e-01, -1.8499e-01,  7.3594e-02,
        -1.4393e-01, -2.5967e-02,  9.5518e-03, -1.2107e-01,  3.3975e-01,
        -7.2660e-01, -1.4666e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3567e-01,  1.2350e+01,  2.4354e-01,  2.1749e-01,  1.7800e-01,
         9.0055e-02, -2.1859e-01,  8.8172e-02, -4.4921e-01,  4.8678e-02,
        -9.7876e-02, -6.3044e-02,  6.1924e-02,  5.0558e-02, -1.1865e-01,
        -1.7840e-02,  1.0393e-02, -3.4751e-02,  8.9363e-03,  1.5899e-02,
         3.8093e-02,  3.9602e-02, -5.4272e-02,  2.1174e-01, -2.7761e-02,
         3.5336e-03,  1.6322e-02,  4.9870e-02,  3.4060e-02, -3.0202e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.2552, 12.3694, -0.7992, -0.6152, -0.4816, -0.2081, -0.4668, -0.2787,
         0.8212, -2.9637, -1.1318,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4223, 14.5643,  0.2030, -0.4315,  0.0555, -0.3088,  0.2569,  0.2931,
         0.5844,  0.1373, -0.7608,  0.0259,  0.3345,  0.0653,  0.0746, -0.1083,
         0.2605,  0.1143, -0.0630, -0.0656,  1.6933,  0.4633,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1395e+00, -5.8097e+01,  8.1673e-01,  1.3817e-01, -5.7142e-01,
        -5.8959e+00, -4.5496e-01,  1.7500e+00,  1.1705e+00,  3.1093e-01,
         1.4335e+00,  3.7752e+00, -8.3806e-01,  3.5484e-01,  2.9612e-01,
         1.1838e-03,  8.6794e-03,  2.4753e-01, -6.3808e-01,  4.1992e-02,
        -1.1159e-01,  3.6406e-01, -8.3415e-03,  1.1304e+00, -8.3772e-02,
        -1.9334e-01, -1.1550e+00, -4.9324e-02, -1.6097e-01,  1.4692e+00,
        -1.0194e-01, -1.0099e+00, -1.7160e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1106e+00,  3.9947e+01,  3.1255e+00, -1.4293e-01, -1.6091e+00,
         6.2099e-01,  3.5970e-01, -9.5893e-01,  2.1371e-01, -2.2187e-01,
         1.8715e-01,  2.3191e-01,  3.9891e-02, -3.9161e-01, -1.2898e+00,
         1.5478e+00,  5.0315e-01, -8.4602e-01, -3.6308e-01,  1.2347e-02,
         3.8319e-01, -5.9180e-01,  5.6036e-01, -3.9343e-01,  1.9076e-01,
         4.1182e-01, -2.0276e+00,  1.6634e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4961e+00,  5.4722e+01,  5.9616e+00,  3.0195e+00,  2.4908e-01,
        -1.1236e+00,  1.9531e+00,  7.9222e-03, -1.0134e-01, -4.0570e-01,
        -1.0374e+00, -1.7965e-02, -1.3478e+00,  3.8704e-01, -6.0027e-01,
        -1.1950e+00, -7.8426e-01, -5.0426e-01, -1.7724e+00, -1.1049e-01,
        -1.0729e+00,  1.7672e-02, -1.7306e+00, -6.9684e-01, -2.4397e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 7.3589e-02,  4.0489e+00,  3.0947e-01,  2.6325e-02, -6.7816e-02,
         1.7524e-01, -5.9893e-02, -2.9722e-02,  3.1477e-03, -5.2509e-02,
         2.7221e-02, -1.1950e-02,  5.1128e-03, -6.0395e-02, -3.0528e-02,
        -1.1195e-01, -6.1270e-02,  6.9779e-03, -4.5257e-03, -4.7170e-02,
         7.8898e-03,  2.6365e-04, -2.4175e-02,  4.7589e-02, -3.6067e-02,
        -6.6294e-02,  2.6321e-02, -2.8813e-02, -2.7517e-02, -5.1602e-02,
        -1.5353e-02, -6.1695e-02, -4.1182e-02, -4.7291e-02, -1.4043e-02,
         4.6458e-02, -4.5334e-02, -1.3316e-02,  2.7758e-02,  1.8507e-02,
         1.4345e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1927e-01,  1.8869e+01,  1.7403e+00,  2.3854e-01,  6.5918e-01,
        -1.3620e-01,  1.1818e-01,  2.6311e-01, -3.0741e-02, -4.9911e-02,
        -1.6637e-01, -4.0752e-02,  2.2560e-01, -1.0019e-01,  1.0701e-01,
        -1.0480e-01,  6.4118e-02,  6.0262e-02, -1.8560e-02,  6.3993e-02,
        -1.7620e-01, -3.3993e-01, -1.6907e-01, -3.1207e-01,  2.6216e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.9867e-01,  2.4182e+01,  5.2751e-01, -1.5132e+00, -1.7837e-02,
        -6.9267e-01, -1.1352e-01,  2.4180e-01, -2.7882e-01,  1.5825e-01,
        -7.9375e-01,  1.5360e-01,  1.1266e-01,  1.1126e-01, -2.9494e-01,
         4.1846e-01,  2.5248e-02,  3.9106e-01, -1.6026e-01,  3.7535e-01,
         1.2685e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9735, 19.3949,  0.9076, -0.0258,  0.2125, -0.4804,  0.5680, -0.1628,
         0.0578, -0.5241, -0.1553, -0.1611, -0.2545, -0.5451, -0.0379, -0.0900,
         0.0487, -0.1133,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.2448e-01,  1.9851e+01,  5.6991e-01,  7.6247e-01,  4.8835e-01,
        -3.7460e-01,  1.1973e-01, -2.4670e-01, -8.6290e-02,  1.7115e-02,
        -3.1695e-01, -5.6432e-03, -1.2879e-01, -2.4953e-01, -1.7562e-02,
        -8.9051e-02, -2.8521e-01, -7.7431e-02,  1.1629e-02,  1.9963e-01,
        -7.2616e-02,  2.6364e-03,  1.7225e-01,  1.3879e-01, -2.0568e-01,
        -5.3910e-02, -1.5458e-01, -3.5292e-03, -8.4523e-03, -2.2985e-01,
        -1.0444e-01,  8.0193e-02,  1.3329e-01, -1.6072e-01, -5.6805e-02,
        -2.9641e-02, -1.4795e-01,  9.5021e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2975e+00,  1.3508e+01, -7.7885e-01,  2.3189e-01, -7.7907e-01,
        -3.7059e-01,  2.9211e-01,  1.2327e+00,  1.7006e-01,  4.6694e-01,
        -2.6219e-01,  5.5337e-03,  2.7845e-01, -5.7155e-01,  2.4599e-01,
        -1.9261e-02, -5.4872e-01, -1.0230e+00, -5.2273e-01, -1.1251e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8644, 34.6314,  2.5314,  0.6063,  0.3645, -1.0903,  0.0775,  0.5304,
         0.7110, -0.3057,  0.1873, -0.3576,  1.7711, -1.6071,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0340e-01,  3.0150e+01,  1.1724e+00, -5.6228e-01, -6.4891e-01,
         1.2977e-01, -1.6008e-01,  2.4477e-01,  5.2591e-01, -8.3052e-01,
        -1.6604e+00, -7.3100e-01, -9.9341e-02,  1.4407e-01, -7.3098e-01,
        -5.7666e-01,  1.0593e+00,  5.5873e-03,  7.1080e-01,  5.5398e-02,
         7.1748e-01,  7.5590e-01,  6.7617e-02,  9.5504e-01,  3.0813e-02,
        -3.7233e-01,  7.5870e-02,  8.6067e-02,  3.7159e-02,  7.3354e-01,
         4.6618e-01,  4.4802e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.3273e-01,  9.6400e+01,  2.0867e+00,  1.1490e+00,  1.5952e+00,
         4.8257e-01,  1.2064e+00, -2.3329e+00, -1.2748e-01, -8.7312e-01,
        -9.0971e-02, -9.1163e-01,  5.3259e-01, -1.3821e+00,  1.2239e-01,
        -3.8482e-01,  1.7585e-01,  9.5788e-01, -3.5106e+00,  4.5615e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8326e-01,  8.3235e+00, -7.5330e-02,  1.4300e-01,  4.5028e-02,
        -1.2389e-01, -5.7067e-02, -3.7346e-01, -1.9608e-01, -3.2789e-02,
        -1.3036e-02,  5.9338e-02, -5.9573e-02,  4.3586e-02,  2.6657e-02,
        -8.9370e-02, -9.7029e-02, -4.6021e-03, -6.7487e-02,  7.6681e-02,
        -2.7406e-02,  1.8768e-01, -3.1211e-02, -1.2234e-01, -2.9464e-02,
        -8.9583e-02, -1.8544e-01, -1.8391e-01, -4.2928e-02, -9.5111e-02,
         2.2764e-01,  1.0655e-01,  3.4984e-01,  2.3061e-02,  4.5411e-02,
        -6.7770e-02, -3.6153e-02, -1.1117e-02, -3.8803e-02, -5.9381e-02,
        -1.7374e-01,  5.2257e-02,  2.8804e-01,  1.3488e-01,  1.7096e-01,
         4.5374e-01,  4.5452e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.0181e-01, -1.1808e+01,  8.3500e-03, -1.1937e-01, -2.5331e-01,
        -2.2516e-01, -2.3734e-01,  1.4756e-01, -9.3890e-01, -6.9564e-01,
         2.2822e-01, -5.7868e-01, -1.0271e-01, -2.8515e-01, -8.2334e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1285e+00,  5.4253e+01,  1.8087e+00,  9.4761e-01, -5.1798e-01,
        -2.4965e-01,  4.6088e-02,  2.1177e+00, -8.7030e-01, -5.4149e-02,
        -1.9773e-01, -4.1721e-01, -2.9442e-01,  1.2322e+00, -1.2316e+00,
        -4.1731e-01,  8.1653e-02, -3.7102e-01, -2.4836e-01, -4.5252e-02,
        -1.5079e-01, -2.1725e-01,  2.7182e-01,  1.0079e-01,  9.0897e-02,
         1.7742e-01,  1.0637e-01,  9.7496e-01,  3.9434e-01, -2.1647e-01,
        -4.7025e-01, -2.7326e-01,  4.0809e-01,  6.8187e-02,  1.4294e-01,
        -9.3031e-02, -1.0082e-01, -5.4044e-02, -3.8363e-02,  2.0595e-01,
        -1.6248e-01,  3.8123e-02, -3.2045e-01,  1.4972e-02,  4.3246e-02,
        -5.3167e-01, -2.8029e-01,  1.0792e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.8278, 16.3428,  0.7627, -0.1848,  0.0302, -0.2337,  0.2125, -0.2841,
        -0.0347,  0.0370,  0.0814, -0.0296, -0.1608, -0.1838, -0.1203, -0.0214,
        -0.4096, -0.2469, -0.1322, -0.1578, -0.0826,  0.0174,  0.0451, -0.0875,
         0.1893,  0.0562,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9561, 24.0172,  0.9589, -0.8543,  1.2222, -0.3529,  0.1821,  0.0583,
        -0.3536,  0.1874,  0.1342, -0.4200, -0.2221, -0.4090, -0.1327, -0.0939,
        -0.5568, -0.4655, -0.3240,  0.0805, -0.9294,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6779e-01,  8.2401e+00,  3.0664e-01, -5.5366e-02,  3.1576e-01,
        -4.5453e-02, -1.4250e-02,  1.4307e-02, -3.5235e-03,  2.1823e-02,
         4.0406e-02,  1.1458e-02, -2.2734e-02, -9.4355e-02,  4.3571e-02,
        -2.6435e-02,  9.2629e-03, -7.6482e-02,  2.3296e-01,  5.4763e-02,
        -6.9793e-02,  8.2645e-03,  1.1782e-01,  9.7193e-02,  8.9076e-02,
        -2.4388e-02, -2.6528e-02,  3.1871e-02, -1.7250e-03, -6.3294e-02,
        -7.4451e-02, -1.4118e-02,  1.6662e-01,  5.0826e-02,  9.9183e-03,
         1.9042e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1808, 17.9607, -0.5919, -0.2166,  0.1812, -0.5624, -0.6094, -0.0751,
        -0.3346, -0.2299, -0.6425, -0.0303, -0.1338, -0.7625,  0.1288,  0.2097,
        -0.2774, -0.0260, -0.3504, -0.7696,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.5185, -37.8465,  -1.8285,  -0.7884,  -1.1822,   0.4751,  -0.8931,
          0.1311,   1.0537,   0.5185,  -0.7313,   0.8766,   1.4620,   0.6488,
          0.9597,   1.1070,  -0.0701,   0.5596,   0.3830,   2.1616,   1.8622,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9913, 18.8234,  0.8212, -0.0582,  1.0358,  0.0399, -0.0743,  0.4068,
         0.3861, -0.2410,  0.0210, -0.0739,  0.4463,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.2090e-01, -4.1280e+01, -1.6461e+00,  7.0583e-01,  3.8911e-02,
         1.6832e-01,  1.4112e+00,  4.7591e-02,  1.0694e-01,  5.7268e-01,
         7.3189e-01,  2.0816e-01,  1.8989e-01,  6.3068e-02, -1.4633e+00,
        -6.3130e-01,  7.6949e-01,  3.7681e-02,  1.1382e-01,  2.9024e-01,
        -1.1496e-01, -4.6564e-02,  2.4880e-01, -4.1647e-01, -2.0867e-02,
         6.2141e-02,  5.6743e-02, -1.5230e-01, -4.1619e-02, -5.6904e-01,
        -1.4209e-01, -3.9892e-02, -4.2389e-02,  4.3081e-01,  9.5876e-02,
         6.6527e-01, -1.0925e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2116e-01,  2.8631e+00, -8.3445e-02, -1.6024e-03, -1.3686e-01,
         4.9091e-02,  1.9089e-02,  5.0249e-02, -4.4401e-02,  1.0291e-02,
        -7.8889e-02, -4.0066e-02,  5.2025e-02,  6.3282e-03,  4.7453e-02,
         2.4476e-02,  3.0694e-02, -4.0774e-02, -3.0973e-04, -1.3735e-03,
         5.2091e-02,  3.0212e-02,  2.1247e-01,  1.6694e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6945e+00, -4.0750e+01, -5.6827e+00, -4.2002e-01, -1.0638e+00,
        -7.3753e-01,  3.3745e-03,  4.2162e-01,  5.5570e-02, -7.0366e-01,
         1.3061e+00,  3.0507e-01, -7.7007e-01,  1.3573e-01, -8.0882e-02,
        -8.6225e-01, -2.2899e-01, -3.2460e-01, -1.8435e-01,  1.3485e-01,
        -2.0594e-02, -5.2781e-02, -3.8403e-01,  2.4799e-01, -1.7632e-01,
         1.2730e-01, -4.7635e-01,  7.0167e-01, -1.9019e-01,  3.7547e-01,
         1.0076e+00,  1.0274e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.4533e-01,  2.1686e+01, -1.4092e-01, -1.1434e+00, -4.4650e-01,
        -1.2517e+00, -2.5956e-01,  9.3722e-02,  6.6253e-01,  4.6040e-01,
        -1.3730e-01, -5.9111e-02, -2.5084e-01, -1.4109e-02, -4.0859e-02,
        -5.4795e-02, -4.0752e-01, -5.4226e-02,  3.2446e-01,  3.6244e-01,
        -1.8585e-01,  5.2947e-01, -1.4147e-01, -1.3793e-01, -2.8769e-01,
        -2.7364e-03,  3.9097e-01,  1.0253e-01, -1.9868e-01, -5.8747e-01,
         8.3720e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4720e+00, -5.6670e+01, -1.9119e+00,  1.7357e+00,  9.7721e-01,
         1.1732e+00,  2.0772e+00,  1.9507e+00,  5.3992e-01, -8.0236e-01,
         6.9929e-01, -2.5622e-01, -1.0955e+00,  2.8605e-01,  6.7400e-03,
        -1.3154e+00, -1.8681e-01,  1.4587e-01, -4.5640e-02, -2.3327e-01,
        -1.5301e+00, -1.6054e+00,  1.7906e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9936e+00, -3.6574e+01, -1.2373e+00,  3.4026e-02,  1.2518e+00,
         2.1421e-01,  1.4793e+00,  3.3440e-01,  1.5639e+00,  6.4048e-01,
         1.5138e-01,  2.5009e+00,  6.4613e-01,  9.6739e-01,  1.4070e-01,
         8.8515e-01,  5.8061e-01,  7.3931e-01,  2.8138e-01,  3.2447e-01,
         2.7517e-02,  5.7148e-02,  1.0925e-01, -1.3532e-01,  1.9652e-01,
         1.3754e-01, -3.3616e+00,  1.1222e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-3.2031e+00,  2.6978e+01,  8.1485e-01, -1.9130e-02,  3.1430e-01,
         5.5745e-01,  2.8958e-01,  2.6855e-02, -3.6382e-02, -2.4267e-01,
         3.5413e-01, -1.2104e-01, -6.3971e-02, -2.1123e-01, -1.3263e-01,
         2.4545e-01,  1.2269e-01, -2.2297e-02, -3.7275e-02, -2.4077e-01,
         6.4129e-01,  3.7650e-02, -4.8976e-01, -3.7561e-01,  5.8926e-02,
         1.1120e-01,  5.2986e-01,  2.1768e+00,  3.0331e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.6257e-01,  7.5848e+01, -6.6509e-02, -9.8354e-01,  6.4901e-01,
         2.4162e+00,  8.9731e-02, -9.8656e-03,  2.9960e-01, -1.7708e+00,
        -1.2827e-01, -1.6109e+00, -3.9875e-01,  6.1082e-01, -9.5095e-01,
         1.1212e+00,  2.7140e-01,  3.2262e-01,  3.1655e+00, -1.2013e+00,
        -4.3612e-01,  3.0414e+00, -2.0449e+00,  1.0739e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7413e-01,  5.3062e+00, -2.2712e-01,  5.7685e-02,  1.5045e-01,
         4.1685e-02,  1.6664e-02, -5.9353e-02,  9.6156e-02, -1.0017e-01,
        -1.9086e-03,  2.5662e-01, -1.2467e-01,  4.3416e-03,  5.7477e-02,
         1.6591e-02, -5.4780e-02,  5.6797e-02, -1.2393e-01,  1.1449e-01,
         1.4552e-01,  2.2642e-02,  1.0235e-01,  1.4092e-02,  2.3582e-02,
        -3.7119e-02, -4.5732e-02,  6.1140e-02, -3.4132e-02, -4.2241e-02,
        -3.2289e-02,  2.3438e-02,  2.2884e-01, -4.8258e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8994e+00,  3.4824e+01, -1.1328e+00, -1.8121e-01,  6.3639e-01,
         8.9580e-01,  4.1088e-01,  5.8651e-01,  3.7465e-01, -2.0929e-02,
         8.6369e-01,  3.1923e-01,  5.0678e-01,  3.1705e-01,  4.6253e-01,
         4.1203e-01, -8.8166e-02,  1.7256e-01,  8.5368e-01, -2.2127e-01,
        -5.2294e-02,  3.7507e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0878e+00, -1.6353e+01,  2.5168e-01,  5.8518e-01, -3.6625e-01,
        -4.0463e-01,  5.1285e-01,  2.2988e-01, -9.7219e-02,  4.3130e-02,
        -2.1208e-01,  2.3828e-01, -5.7026e-03, -1.4243e-01,  7.4684e-02,
        -2.9641e-01,  2.2454e-01, -1.1646e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6075e-01,  1.1562e+01, -3.6657e-06,  2.9418e-02,  1.2381e-01,
         2.5992e-01,  3.9229e-02,  9.1613e-03,  1.7998e-01,  2.4873e-01,
         8.4621e-02, -2.4161e-02, -2.6871e-01, -1.6643e-01, -1.5738e-02,
        -3.0601e-03, -1.8937e-02, -2.2407e-01, -2.7158e-02, -6.2406e-02,
         1.7039e-01, -4.3760e-02, -8.4498e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4282, 12.3443, -0.1342, -0.6659, -0.2432,  0.0189, -0.0827, -0.1676,
         0.0555, -0.1411,  0.0422,  0.0573, -0.0281,  0.1027,  0.0877,  0.1422,
         0.3008,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0464, 28.2276,  2.8118,  0.1362,  1.5108, -0.7963,  0.3697, -0.3827,
        -0.0973,  0.0529,  0.0290, -0.0428, -0.3120, -0.1280,  0.2369,  0.2933,
         0.8534, -2.1280,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.7962, -18.1147,   0.1305,   0.2760,   0.1716,   0.6757,   0.4770,
          0.3940,   0.3269,  -0.7161,   0.0578,  -0.2100,   0.1711,   0.2416,
          0.1983,   0.0209,   0.1028,   0.4051,  -0.1704,  -0.0864,   0.1789,
          0.1849,   0.1215,  -0.1526,   0.4282,   0.5055,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5396e+00,  1.7005e+01,  7.3095e-01, -9.7811e-01,  2.2181e-02,
         1.0948e-01,  2.5190e-01,  6.5355e-01, -2.3595e-01, -5.6744e-01,
        -2.2187e-01,  1.8258e-01,  1.5790e-01,  2.1754e-01, -2.2482e-01,
         1.5561e-02,  7.5295e-02, -1.2292e-01,  1.6370e-01,  7.4812e-02,
         2.5897e-01,  6.4852e-03,  2.4960e-02,  1.2831e-01,  1.4114e-01,
         3.6436e-01, -1.2749e-01,  9.5025e-02,  6.4901e-01, -8.2265e-01,
        -1.7472e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1371, 33.0558,  0.8843, -0.1293,  1.5283,  0.7833,  2.1461,  0.7042,
        -0.1059,  0.6650,  0.5188, -0.5734,  0.4570,  0.2190, -0.2552, -0.0789,
         1.2355,  0.5607,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0849e+00,  3.4471e+01, -1.1362e+00,  1.0858e+00, -3.5284e-01,
         1.7328e+00, -4.3272e-01,  4.0110e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-1.1907e-01,  9.9880e+00,  3.7161e-01, -2.6698e-01,  1.0013e-01,
         2.4879e-01,  1.6575e-01,  1.2250e-01, -1.7116e-01,  4.5829e-02,
         3.0723e-02, -1.3058e-01,  1.1909e-01,  2.6096e-02,  1.2969e-01,
        -1.9852e-03, -8.7582e-03,  5.5797e-02,  2.3233e-02,  3.7369e-03,
         3.3403e-01, -4.7340e-01,  1.5929e-02, -4.2714e-02,  5.2121e-02,
         5.9430e-02, -2.2779e-02,  5.9756e-02, -1.0613e-01, -4.8818e-02,
         8.7494e-02,  2.1843e-02, -1.5672e-02,  3.3559e-02,  5.2836e-02,
         6.0735e-02,  3.1247e-02, -4.9708e-01,  1.1752e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0518e+00,  1.9409e+01,  3.0853e-01,  1.0035e-01,  5.4046e-01,
        -2.6080e-01,  5.2311e-01,  1.2432e-01,  1.1283e-01, -4.3994e-01,
         2.9129e-01,  9.0150e-02,  1.7912e-01,  7.7603e-02,  9.6718e-02,
         1.0117e-01, -1.9312e-01, -2.3424e-01,  1.5810e-02,  1.9742e-01,
         2.3282e-01, -7.0049e-02, -4.0776e-02, -2.7120e-01, -2.1870e-01,
        -3.2917e-01,  1.7118e+00, -5.5266e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.3097, -11.9326,   0.4540,   0.5408,   0.2464,   0.2488,   0.0372,
          0.0761,  -0.2197,  -0.1561,  -0.2685,  -0.5684,  -0.4187,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5452e-01,  1.8129e+01, -7.5623e-01,  4.3929e-01, -2.8250e-01,
        -6.3909e-02, -4.1569e-01,  4.8091e-03,  8.2764e-02,  3.2340e-02,
        -4.4531e-01, -3.4689e-01,  3.0709e-01,  5.6433e-01, -2.7157e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1793e-01,  5.9772e+00, -5.2886e-01, -5.5810e-01, -2.6753e-01,
         8.3919e-02, -3.9796e-02, -5.8373e-02,  6.1555e-02, -8.2537e-03,
         1.9309e-02,  8.4579e-02,  1.1558e-01,  2.2007e-01,  4.3424e-02,
         3.8154e-02, -1.3280e-01, -3.3301e-01,  4.4927e-02,  7.8639e-02,
        -1.1566e-01,  3.3941e-02, -5.9026e-02,  4.3452e-02,  1.4597e-01,
         2.9722e-01, -2.3905e-03,  1.7454e-02, -8.6138e-02,  8.5925e-02,
        -9.8362e-02, -2.9486e-02,  1.6699e-01,  4.5233e-01,  3.5700e-02,
        -1.6289e-01, -1.1731e-01,  9.1388e-02, -5.5488e-02,  5.6411e-02,
        -1.7548e-02, -3.3662e-02,  2.4075e-02,  1.7141e-02,  3.6341e-02,
         1.4386e-01, -5.5182e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5094e-01,  3.5085e+01,  2.9581e+00, -9.0280e-02,  1.2802e+00,
         6.7098e-01, -3.7590e-01,  4.7983e-01,  7.2489e-01,  4.0500e-01,
         4.0347e-01,  2.2456e-02,  3.3241e-01, -1.0418e-01, -6.8126e-02,
        -4.0996e-01, -9.0193e-02, -2.0274e-01,  3.5593e-01,  2.2678e-01,
        -1.8201e-01, -4.0431e-01, -3.6979e-01, -2.9509e-01,  4.1417e-01,
         3.1035e-01,  2.4006e-01,  5.7793e-02, -3.5766e-01,  1.5710e-01,
        -3.6986e-01, -2.4333e-01, -9.7793e-03, -4.7493e-02, -6.8068e-02,
        -3.7841e-02,  2.3111e-01, -3.3960e-02, -1.5212e-01, -4.0039e-03,
         9.9668e-01, -1.1586e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7930e-01,  1.4262e+01,  9.7697e-01,  2.6213e-01,  3.0778e-01,
         3.3205e-01,  4.6539e-01,  1.9816e-01,  5.6411e-02,  4.3255e-01,
         5.8089e-01,  3.6752e-01,  5.6447e-02,  4.9578e-02,  1.7917e-01,
         1.4904e-01,  4.4489e-01, -2.9569e-02,  4.2944e-03,  8.4673e-02,
        -1.6552e-01, -1.0255e-01, -1.5908e-01,  4.7031e-02, -7.9489e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6807, 20.0253,  0.7056, -0.0510, -0.1809, -0.1821, -0.1735, -0.1078,
         0.1884, -0.1891, -0.1501,  0.0878, -0.0820, -0.3361, -0.2976, -0.2540,
         0.1854, -0.0672, -0.0468,  0.3391, -0.2716, -0.0543, -0.3361,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.3549, -30.5614,  -1.4299,   0.6581,  -0.6119,  -0.0411,  -0.2182,
         -0.5565,   0.4051,  -1.4988,  -0.2354,   0.2986,  -0.0988,   0.5329,
         -0.2920,   0.2664,   0.1150,  -0.3930,   0.2375,   0.4450,  -0.1080,
          0.2125,  -1.1897,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2934, 53.8796, -3.9824,  0.5452, -1.2113,  1.2736,  2.5370,  1.0845,
        -0.4028,  0.3917,  1.9844,  2.9708,  1.8258,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([16.5655, 46.0950,  1.8656, -3.0360, -1.1992,  0.2040, -1.6815, -1.9506,
         0.4984,  0.1088,  0.4252, -1.7693, -2.5555, -3.0056,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.7774, -37.1440,  -1.5739,  -0.7853,  -0.9895,   0.6671,  -0.4681,
          0.5944,  -0.2187,   0.4694,  -0.1854,   0.2196,  -0.6862,  -0.1875,
         -1.1606,  -0.3039,   1.6864,  -0.6312,  -0.6662,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-2.9550e+00,  2.1227e+01, -3.2074e+00, -1.2459e+00,  2.0439e-01,
         8.6279e-01, -6.7607e-02,  5.8422e-01,  6.9238e-02,  2.6082e-01,
         1.1594e-01, -2.3094e-01,  1.6081e-01,  2.0691e-01,  2.3052e-01,
        -3.7650e-01,  1.3793e-01,  1.7375e-01,  2.3211e-01,  5.4662e-01,
        -7.5931e-02,  1.5703e-02, -8.0417e-02,  1.0080e-01,  9.2533e-02,
        -8.2652e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8810e-01,  6.8023e+01,  3.1742e+00,  2.5213e+00,  2.1920e+00,
         1.0279e+00,  9.8970e-01, -4.7019e-01,  2.5926e-01, -9.0400e-01,
         1.5370e+00, -4.9026e-01, -5.4806e-02,  4.2965e-01, -1.8465e+00,
         5.6060e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4322, 34.5381, -4.0504,  2.3825,  0.7890,  1.1065,  0.4244,  1.4841,
        -0.2744, -0.5818, -0.2978, -1.2722,  0.2400,  0.8705,  1.1814,  3.1470,
         1.7878,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6022, 39.1875,  0.6755,  0.7145,  0.8345,  0.6748, -0.6635,  1.5338,
         1.0027,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3655e+00,  2.5557e+01,  1.8920e+00, -1.2163e+00,  7.3075e-01,
         1.2782e+00,  1.5460e+00,  4.8036e-01,  2.2798e-01,  2.5336e-01,
        -6.3930e-01, -9.9164e-01, -3.2332e-01,  3.3055e-02, -3.2269e-01,
         3.7688e-01,  6.8630e-01,  2.8656e-01,  4.3580e-01,  8.2797e-01,
         1.7317e-01,  1.8225e-01, -5.4723e-01,  8.1694e-03,  2.3934e-01,
        -2.9126e-01,  1.1057e+00, -4.5167e-02,  1.7734e-01, -1.7996e-02,
        -3.9802e-02,  5.5682e-01, -1.0527e-01,  7.1397e-02,  6.0315e-01,
        -2.0489e-01, -3.0312e-01,  4.4772e-01,  1.6755e-01, -1.2461e-01,
        -2.0425e-01, -4.0634e-02, -6.0954e-01,  1.3016e-01,  5.9738e-01,
         1.2026e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1015e+00, -4.5256e+01,  3.3075e-01,  1.9683e+00,  1.0222e-01,
         2.8182e-01, -1.6841e+00,  6.5519e-01,  1.8101e+00, -2.7743e-01,
         4.1734e-01, -1.7050e-02, -1.2575e+00, -1.3170e-01, -3.4901e-01,
        -5.8179e-01, -9.8457e-03, -2.9494e+00,  3.2768e-01, -3.3413e-01,
         7.7632e-01, -7.2196e-03,  9.5489e-01,  2.6106e-01,  1.0866e-01,
        -2.7146e-01,  3.2023e-02, -7.2378e-01,  1.3126e+00, -1.4257e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.1015, -27.4340,  -1.3301,   0.4845,   0.6200,  -1.0453,  -0.6311,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.2766, -25.1035,   0.8340,   1.3746,  -0.2562,   0.0901,  -0.1468,
         -0.6791,   0.6491,  -0.2484,   0.8832,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1715, 53.5946,  3.1269,  0.7251, -1.4842,  0.3501, -1.3186, -0.3602,
        -0.2205, -0.6436, -0.6901, -0.5045,  0.8054, -5.2628,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0450, 29.9242,  0.8558, -0.6345,  0.0980, -0.2261, -0.5037, -0.7586,
        -0.1235, -0.5178,  0.1648,  0.3110, -0.4744, -0.0929, -0.2982, -0.2847,
         0.1311,  0.0568, -0.2538,  0.6244,  0.2591,  0.2186, -0.3909, -0.2331,
        -0.0927,  0.1704, -0.2305,  0.1575, -0.2594, -0.2541, -0.3778,  0.4457,
        -1.4884, -0.0338,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4765e-01,  1.5706e+01,  6.7586e-01, -1.5046e+00, -2.6358e-01,
         2.6577e-01,  4.0736e-01, -3.6301e-01,  8.5311e-01,  4.8106e-01,
        -2.1804e-01, -3.4439e-01, -1.2701e-01,  1.5370e-01, -4.6295e-01,
        -2.0986e-01, -7.3831e-01, -3.1985e-01, -1.7093e-01,  9.0241e-02,
        -1.3584e-01, -1.3567e-01,  2.7674e-02, -2.9679e-03, -5.2612e-02,
         2.5210e-02,  4.8921e-02,  6.7555e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3532, 47.8185,  3.9920, -0.4879,  0.6630,  0.4904, -0.3492,  0.1741,
        -0.3630, -0.3926,  0.3136, -0.4392,  0.5823,  0.1130,  0.9216, -0.2791,
        -0.2938, -1.1410,  1.0152,  0.0481, -0.1280, -0.1116, -0.3520, -2.4338,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([-8.2207e-01, -6.4315e+01,  1.7580e+00,  6.6029e-02,  1.5110e+00,
         1.7038e-01, -8.2717e-02,  9.3725e-01,  3.2433e-01,  2.9802e-02,
         2.4823e-01,  4.2640e-01, -8.2180e-01,  5.5562e-02, -3.6683e-01,
        -6.3311e-01, -4.5380e-01,  6.9156e-01,  7.3811e-01, -3.3666e-01,
        -8.6983e-01, -1.4705e-01,  5.2893e-01, -1.9732e-01, -3.8388e-01,
        -1.1323e-02,  5.7008e-01,  3.2854e-01,  2.4715e-01,  2.4715e+00,
        -2.5968e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -6.3977, -47.4682,   3.0757,   0.2938,  -0.8078,  -0.1943,  -0.3878,
          0.3280,   0.9257,   0.8735,  -0.5807,  -1.6519,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.2685, -43.0732,  -0.4157,   2.4128,  -1.3525,   1.0182,   1.6747,
          0.3232,   0.1059,   0.3330,   0.5644,   0.7095,  -2.3688,  -0.5142,
         -2.1058,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3649e+00,  2.9356e+01, -3.5807e-02, -9.3970e-02, -7.3797e-02,
         6.9698e-01,  4.8490e-01, -3.3271e-01, -1.4347e-01,  6.3830e-01,
        -5.6341e-01, -1.7613e-01, -2.4057e-01,  5.1408e-02,  2.8246e-01,
         3.2718e-01,  1.3176e-01, -5.6905e-01,  6.4346e-02,  4.9167e-01,
        -4.0033e-03, -1.2307e-01, -2.6870e-01, -5.1350e-02,  1.5462e-01,
         9.1297e-02,  1.7107e-01, -1.6559e-01, -3.7123e-01,  1.9373e-01,
         4.9709e-01,  3.5114e-02, -1.2664e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.7889, -15.1405,  -0.2849,   0.5261,  -0.3361,   0.0723,   0.2578,
          0.2146,  -0.3017,   0.1485,   0.2189,  -0.0583,   0.5774,   0.7115,
         -0.1539,   0.4817,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4073,  8.1029, -1.2201, -0.6709,  0.3595,  0.3807,  0.2880,  0.2882,
         0.2307,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2087, 13.9609,  0.4412,  0.8578, -0.5263,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0703e+00,  3.1903e+01,  1.8028e+00, -5.6918e-01,  5.5590e-01,
         8.3179e-01, -2.1079e-01, -6.2781e-01,  2.5604e-01, -3.4804e-01,
         7.0102e-01,  1.1131e+00,  2.9201e-01,  6.8517e-01, -2.3885e-02,
        -4.2399e-03,  8.3693e-02, -9.7436e-02, -7.7203e-01, -8.6808e-02,
        -1.9881e-02, -2.7747e-02,  9.4608e-02, -8.2669e-02,  8.9043e-02,
        -7.6529e-01,  2.7395e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0961, 41.1390, -0.3151,  2.4534,  5.1186,  5.1651,  0.4208,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1896, 30.5964, -1.6808,  0.4912, -0.2512, -0.1633,  0.5963, -0.3323,
        -0.0593, -0.0840, -0.1215,  0.2556,  0.1382, -0.4140, -0.4160, -0.1832,
        -0.1812,  0.3933,  0.4926, -1.1808, -0.9439,  1.0718,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8015e-01,  1.1289e+01,  5.0628e-01,  3.1756e-01,  3.7424e-01,
         7.2411e-02,  2.2647e-03, -1.7008e-02,  7.5351e-02, -2.5675e-01,
        -4.6247e-02,  6.1738e-02, -6.2033e-02,  1.7311e-01, -4.1483e-02,
        -9.1311e-02, -2.2970e-01, -1.9522e-01,  6.1150e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2040e+00, -4.9460e+01, -9.1982e-01,  4.1991e-01, -3.2280e-01,
        -9.8174e-03,  4.7994e-01,  5.2661e-02,  2.8944e-01,  3.5440e-01,
         6.8053e-01, -1.8465e-01,  1.7282e-01, -7.4280e-02, -2.4619e-01,
         1.9741e-01, -7.0820e-01,  1.2622e-01,  2.9594e-01,  1.2216e+00,
         2.3105e-01, -1.1126e+00,  1.2583e-01, -4.6866e-02, -7.7371e-01,
        -9.5978e-02,  3.8489e-01, -9.1419e-03, -5.6712e-02,  1.3030e+00,
         1.3069e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 3.1940e+00, -2.1319e+01,  1.6095e-01,  7.0459e-02, -3.5199e-01,
         5.5070e-02,  1.9728e-01,  1.2018e-01,  5.0652e-02, -1.0857e-01,
         8.9959e-02,  1.6506e-01,  7.8516e-01, -3.3496e-01, -2.7125e-02,
         2.7912e-02, -1.1106e-01, -1.1953e-03,  6.6212e-01, -4.7164e-02,
         2.9804e-01,  6.4168e-02, -2.2550e-03, -3.0077e-01, -1.0038e-01,
        -8.0515e-02,  2.1885e-01,  3.7866e-01,  3.7572e-02, -3.3665e-02,
         1.5644e-01,  5.4879e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9227e+00,  4.8556e+01,  1.9537e-01,  8.0891e-01, -3.4751e-01,
        -4.5175e-01,  3.0521e-02,  1.1561e-01,  1.5700e+00,  1.0472e+00,
        -1.2638e+00, -3.7613e-01, -1.7833e-01,  8.4326e-01, -8.5461e-01,
        -5.7275e-01,  4.4578e-01, -8.2335e-02, -3.5319e-02,  2.0806e-01,
        -3.7234e-01, -5.0586e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9659e-01,  1.0394e+01,  7.4958e-02, -8.7483e-02, -1.4804e-01,
        -2.5869e-01, -6.8823e-01,  4.5727e-01,  8.7492e-03,  6.6786e-02,
        -2.2238e-01,  2.9439e-02,  2.1474e-02,  4.8281e-02, -1.6654e-01,
        -1.7092e-01, -2.8557e-02,  1.9155e-01, -2.0920e-02, -1.1675e-01,
         3.4712e-02,  9.8775e-02, -3.5558e-02, -2.4482e-02, -7.5886e-02,
        -1.0253e-01, -8.8555e-02, -1.7711e-01, -1.2105e-01,  1.6056e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1269e-01,  4.5739e+00,  2.1527e-01, -3.8142e-02,  3.9945e-02,
        -5.4006e-02, -8.1851e-02, -2.0853e-02,  1.8722e-02,  7.3781e-03,
        -8.5454e-02, -7.5774e-02, -7.9949e-02,  8.0477e-02, -4.4023e-02,
        -1.0210e-01, -1.7888e-04,  4.8380e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4383e-02, -1.3067e+01, -1.9613e-01, -7.1280e-01,  2.0295e-01,
         3.8844e-01,  9.3259e-03, -2.4081e-01,  2.8605e-01,  2.3628e-02,
         1.1630e-01, -5.6342e-02,  5.5229e-02, -1.4084e-01, -6.7534e-02,
         2.2871e-01, -3.4842e-01,  2.5545e-02,  2.2146e-01,  1.8320e-01,
         6.8977e-02,  9.5938e-02,  8.5374e-02, -1.8964e-02, -1.0032e-01,
         6.5250e-02,  1.8045e-01, -4.4882e-02, -4.8518e-02, -6.0849e-02,
         7.3441e-02,  1.0108e-01, -1.7359e-01,  8.7484e-02,  6.8822e-02,
        -2.0409e-01, -4.7219e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1884e+00,  5.4727e+01, -1.0114e+01, -5.7035e+00, -2.0558e+00,
        -7.4911e-01, -8.8618e-01, -1.5007e+00,  2.3854e-01, -2.3117e-02,
         2.0949e-01,  1.9305e+00,  5.1077e-01, -1.9916e-02, -3.2734e-01,
         3.1435e-01,  6.0485e-01,  5.1579e-01,  1.7119e-02,  1.0355e+00,
         8.3474e-01, -9.5608e-01, -1.4438e+00, -7.8966e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3661e-01,  6.3082e+00, -3.4076e-02, -1.2224e-01,  3.1042e-02,
         3.6707e-02,  1.5685e-02,  1.3000e-02, -7.3198e-02,  2.8772e-03,
         3.7570e-02, -2.2421e-02, -2.3765e-03,  3.7465e-02,  5.8870e-02,
        -4.6490e-03, -5.6702e-02,  2.5381e-02,  1.4314e-02, -5.4702e-02,
         7.0732e-03,  3.8804e-02, -1.9179e-02, -4.7642e-02,  1.1021e-01,
         1.6136e-01,  2.5476e-02,  6.5704e-02,  6.2522e-02,  1.0744e-03,
         9.3406e-02,  4.3596e-03,  3.1462e-03,  3.5619e-02,  5.1430e-02,
        -8.7113e-02, -3.8833e-02, -4.6973e-02, -1.9887e-02,  4.0055e-02,
        -3.8139e-03,  2.8652e-02, -7.0456e-03,  9.8041e-02,  1.3970e-03,
        -2.0453e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5488, 21.5976,  0.3071,  0.2281, -1.2211,  0.4819, -0.9595,  0.2157,
        -0.2717, -1.1028,  0.2578, -0.0455,  0.3882,  0.1475,  0.3499, -0.0954,
        -0.1150, -0.9100,  0.0390,  0.4086,  0.8489,  0.0373,  0.2267,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  4.6828, -54.7523,   5.2918,  -4.0684,  -0.9088,  -1.2258,   0.3584,
          0.9134,   0.8600,   0.1397,   1.6989,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1956, 15.1328,  0.7121, -0.6258, -0.0172, -0.5006, -0.1677,  0.2022,
         0.3291,  0.8288,  0.3015,  0.1252,  1.5275,  0.3993,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5376, 25.1006,  0.2249, -0.6695, -0.8167,  0.4720, -0.4575, -0.0552,
        -0.4279,  0.3654,  0.4944,  0.2948,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5707e+00,  3.4132e+01,  2.4529e+00,  3.1447e-01,  7.1805e-01,
        -2.4187e-01, -1.7023e-01,  6.0138e-01,  1.5925e-01,  2.9844e-01,
         1.5356e-01,  6.2095e-02,  3.9256e-01,  3.4703e-01, -3.2064e-02,
         2.5925e-01, -2.7368e-02,  3.9073e-01,  6.7951e-02, -5.7117e-01,
         4.8366e-02,  5.6552e-02,  2.7068e-01, -2.7867e-02,  2.2688e-02,
         3.9689e-01,  6.2484e-01,  5.2669e-02, -7.3172e-02, -1.4968e-01,
        -2.2782e-01,  3.9331e-01, -1.6133e-01,  2.6019e-01, -2.4617e-02,
        -4.4020e-03,  6.5495e-03, -1.3328e-01, -8.2685e-02, -4.4292e-02,
         1.5669e-01,  1.6278e-01,  1.6604e-01,  2.3184e-02, -8.9491e-03,
         1.1149e-01,  1.4503e-01,  2.2067e-02,  7.0840e-02, -5.3758e-02,
        -3.5567e-02, -5.6568e-02, -1.6934e-01,  4.1902e-01,  1.4402e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ -1.2238, -27.0118,   0.0302,   0.3790,   0.0735,  -0.0540,  -0.4358,
          0.0782,  -0.2943,  -0.0938,   0.4757,  -0.3312,   0.0481,  -0.1076,
          0.1706,  -0.1853,   0.1509,  -0.7027,  -0.2152,  -0.4854,   0.4190,
         -0.2101,   1.0105,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6250e+00, -2.9104e+01, -5.7158e-01, -2.3142e+00, -7.3981e-01,
        -7.7693e-01, -4.0616e-01, -3.0326e-01,  2.4194e-04, -5.2373e-01,
         1.0251e-01, -3.9901e-01,  1.5733e-01,  5.8624e-01,  5.6473e-01,
        -1.1108e+00, -3.9319e-01,  3.2702e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.9237, -15.6915,  -0.4366,  -0.6422,  -0.2449,  -0.1987,  -1.1932,
         -0.3184,   0.1161,   0.3337,   0.1166,  -0.2079,  -1.2081,   0.1551,
          0.0692,  -0.2302,  -0.0767,  -1.2747,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2146e+00,  3.1316e+01, -1.1804e-02, -9.9457e-01,  9.7795e-01,
        -1.0724e+00,  1.8703e-01,  5.8334e-01, -1.2842e+00,  1.7062e-01,
         3.2066e-01,  1.4090e-01,  9.6045e-01,  4.5016e-02, -3.0249e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6649e+00,  2.3545e+01,  8.2263e-01,  1.1252e-01, -1.8485e+00,
         2.8481e-01, -4.8909e-01, -6.9981e-01, -6.7068e-01,  1.4857e-01,
        -8.6249e-02, -1.4916e-01,  1.3844e-01, -9.4395e-02,  3.6207e-02,
         7.5200e-01, -3.1680e-01, -2.9204e-01, -1.7533e-01, -1.1605e+00,
         1.6632e-02, -7.5569e-01, -1.3184e-01, -2.7593e-01, -2.4919e-01,
         6.2306e-02,  4.3280e-01, -3.3113e-02,  1.0013e+00,  5.6450e-01,
        -5.2846e-02, -2.0167e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.4108e-01, -2.3076e+01, -8.8511e-01, -7.3521e-01, -2.2281e-01,
        -1.4782e-01,  1.3137e-01,  2.4705e-01,  1.0079e-02,  2.4525e-01,
         5.8960e-02,  2.4422e-01, -2.5611e-01,  3.5919e-01, -4.0102e-02,
         2.9583e-01,  4.7383e-02, -2.2201e-02,  6.7192e-02, -4.4369e-02,
         1.5501e-01, -7.1147e-02, -1.8841e-01,  1.3019e-02,  6.2554e-02,
        -3.7827e-01, -2.2523e-01, -2.7436e-01,  4.0164e-01,  2.9146e-01,
         4.9159e-01,  1.6174e-01, -1.7270e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5280e-01,  1.1561e+01,  3.7660e-01,  6.2647e-01,  5.5188e-01,
         3.1041e-01,  2.6859e-02,  1.1447e-01,  4.3914e-02,  2.6690e-01,
         9.6651e-02, -9.6243e-02, -9.3852e-03,  6.9705e-02,  3.1890e-02,
        -1.1277e-01,  1.6960e-01, -1.6419e-04,  9.6489e-02,  7.1097e-02,
        -2.8545e-02,  1.7686e-01,  1.8874e-02, -2.6776e-02,  3.8937e-02,
         1.3169e-03,  1.8758e-02,  2.2719e-03,  1.5411e-01,  1.5928e-01,
         1.4423e-01,  1.4200e-01,  4.8007e-02,  1.8981e-02,  2.9977e-02,
         1.2918e-03,  6.1245e-02,  1.0388e-02,  7.5030e-02,  1.7139e-01,
         1.9471e-02,  3.7920e-02,  1.6124e-03,  4.5313e-02,  1.0767e-01,
         4.5686e-02,  5.7072e-02,  4.0632e-02,  5.2158e-02,  1.4525e-01,
         4.6559e-04,  3.4855e-02,  7.8046e-02,  2.3900e-03,  1.2393e-01,
         2.1824e-01,  2.8870e-02,  6.8631e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7819e+00, -3.7632e+01, -1.7237e+00,  1.0310e+00, -1.1057e+00,
        -7.2419e-01,  5.4755e-02,  5.6971e-01, -3.4458e-02, -1.0607e+00,
        -3.2071e-01, -2.1883e-01, -2.0056e-01, -6.8558e-01,  2.2724e-02,
        -6.2966e-02, -4.4528e-01,  4.1957e-01,  3.1532e-02,  5.7817e-02,
         1.0632e-01, -3.9579e-01,  4.7586e-01,  2.8965e-01,  6.9919e-01,
        -3.5794e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4211e-01,  1.1845e+01, -2.6417e-01, -5.4992e-02,  5.7773e-02,
         1.4174e-02,  7.5064e-03, -1.1906e-01, -1.5353e-01, -1.3729e-01,
        -2.1992e-01,  2.1806e-02,  4.3966e-02, -3.7883e-02,  8.5633e-03,
        -2.0040e-02, -1.2554e-01,  2.2377e-02,  9.3015e-02,  1.0912e-02,
        -3.2556e-02, -2.8098e-01,  2.3458e-01, -9.7484e-02, -2.0484e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0752e+00,  1.6908e+01,  1.1394e+00, -3.6299e-01,  5.5415e-01,
         5.2356e-02,  3.1790e-02,  1.2186e-02,  5.8036e-01,  4.2933e-01,
        -5.7577e-01, -2.4174e-01, -3.2466e-01, -1.9144e-01,  1.0085e-01,
        -1.2187e-01, -1.1034e-01, -3.5783e-02,  2.5126e-02, -2.5422e-02,
        -2.0463e-01,  1.0759e-01, -1.2458e-01, -8.0410e-02,  2.5370e-01,
        -3.3646e-02, -2.9845e-02, -7.1663e-02, -2.4521e-02,  6.3493e-02,
        -1.3670e-01, -1.0747e-01,  2.7018e-01,  1.5107e-01,  3.0534e-01,
         5.3053e-02,  7.6146e-02,  1.3590e-01,  9.5910e-02,  1.9446e-01,
        -2.9648e-01, -4.3103e-02,  2.3387e-01,  8.3135e-01,  4.4706e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5426,  6.3561, -0.1590,  0.1966, -0.1451,  0.0531, -0.0989, -0.1625,
        -0.0337, -0.0484, -0.1280, -0.0430,  0.0240,  0.0411,  0.0245,  0.0782,
         0.0339,  0.0323, -0.0111,  0.0265, -0.1342,  0.4106, -0.1578,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6430, 31.7045, -4.0819, -1.4426,  0.4239,  0.6726, -0.1221,  0.3154,
         0.1564, -0.6311, -1.8973,  1.8765,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 6.9406e-01,  1.7376e+01,  6.9386e-01, -3.8648e-01, -3.1888e-01,
         1.6613e-01, -3.4728e-01,  5.6030e-03,  2.7041e-01, -3.2298e-01,
         1.9545e-02,  1.5181e-01,  1.5406e-01,  2.1330e-01, -3.8374e-01,
        -2.9170e-01, -4.5100e-02,  1.3349e-01, -3.0826e-02,  8.3680e-02,
         4.9325e-02, -3.2145e-01, -9.4105e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3101e-01, -3.7624e+01, -1.7804e+00,  5.2994e-01,  9.8080e-01,
         2.7162e-01,  4.8817e-01, -5.3117e-01,  7.0827e-01, -4.9902e-02,
         2.0885e-01,  5.0626e-01,  1.8891e-01, -8.4350e-02, -2.0141e-01,
         4.7909e-01,  2.9234e-01,  1.0662e+00,  2.5677e-01, -2.7358e-01,
         3.3726e-01,  7.0568e-01,  4.6725e-01,  7.6717e-01,  1.8543e-01,
        -2.3644e-02,  3.4837e-01, -1.0388e-01,  5.0864e-01,  1.6898e-02,
         1.7137e-01,  1.9669e-02, -6.2414e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1242e+00, -7.1549e+01, -3.9719e+00, -1.1370e-01,  2.3947e+00,
        -9.3566e-02,  1.0674e+00, -1.4176e-01, -8.2097e-01,  4.2759e-01,
         9.7018e-01,  3.2388e-01, -9.4458e-01,  1.3763e-01, -3.4181e-01,
         4.7996e-01, -2.1978e-01, -2.5143e-02,  3.3490e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8704e+00, -3.6021e+01,  3.5233e-01, -2.6977e-01, -8.2646e-01,
        -1.6426e+00,  2.1940e+00, -1.0318e-02,  9.1312e-01,  2.9906e-01,
        -1.4926e+00,  1.3862e-01,  1.5874e-01, -5.3162e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.6560e-01, -3.9469e+01, -7.3442e-01,  8.9153e-01, -5.1544e-01,
        -3.1455e-01, -2.7992e-01,  4.2737e-01, -1.1479e-01, -7.4865e-01,
        -3.4693e-01, -1.4223e-01, -8.5570e-02,  1.2270e+00, -2.7131e-01,
        -1.9962e-01,  8.0901e-01, -1.4817e-02, -5.0333e-01,  1.4445e-01,
         7.3440e-01, -1.0264e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2600e+00, -4.8260e+01, -2.1209e+00,  2.0228e+00, -1.7942e-01,
        -6.8829e-01,  1.0640e+00,  9.5168e-01,  9.6007e-01, -1.7175e-02,
         1.2399e-01, -5.1492e-02,  1.1557e+00,  1.7648e-01,  1.6232e-01,
        -2.6549e-01, -1.6686e+00,  4.8969e-02, -7.3692e-02, -4.8312e-01,
        -1.8794e-01, -2.6020e-01,  3.7906e-01, -2.8263e-01,  4.3591e-01,
         4.9783e-02, -1.5453e-01, -4.9927e-01, -3.5993e-01,  5.0373e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6854, 29.2166, -0.0601, -0.1463, -0.2250, -0.6774,  0.3353, -0.0929,
        -0.3607, -0.0393, -0.5001, -1.1212, -0.9575,  0.3683,  0.5018, -0.9444,
        -1.1117,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8431, 14.2515,  0.4525,  0.5423,  0.0175,  0.0549,  0.0496,  1.7698,
        -1.2737, -0.3131, -1.2394,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8284, 65.8315,  3.1429,  0.3697, -2.6283,  1.5651,  1.1598,  0.8358,
         0.1369,  1.5049, -0.1536,  0.3292,  0.2799,  1.8690,  0.3221, -0.2772,
        -4.3962,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3885e+00,  2.5126e+01,  7.3003e-01,  1.7069e+00, -3.4595e-01,
        -5.0484e-01, -1.3188e-01,  6.7158e-01, -6.2903e-01, -1.5197e-02,
        -2.0219e-02,  1.6515e-01, -1.3231e-01,  5.9885e-01,  2.2474e-01,
        -1.8280e-02,  4.8653e-03, -6.6049e-02, -6.3747e-01, -4.1679e-02,
        -6.8647e-02, -4.7568e-01,  5.0452e-01,  2.9378e-01,  1.1210e-01,
        -1.2715e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0896e+00,  1.8171e+01, -1.3616e+00, -8.0659e-01, -2.9981e-01,
        -4.1494e-01, -2.0693e-01,  2.9073e-01,  8.2112e-02,  6.4148e-02,
        -1.1103e-01,  1.8979e-01, -6.8318e-02, -1.3301e-01,  1.7122e-02,
         4.7004e-01, -1.2036e-01,  5.7662e-02, -6.5163e-02,  2.0548e-01,
         1.5744e-01,  1.1435e-01, -8.1313e-03,  1.0348e-02,  3.1174e-01,
        -2.0319e-02, -1.0162e-01, -6.9499e-02,  1.1716e-01,  7.3807e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.1048, -14.4759,   0.7561,   0.7591,  -0.0193,  -0.3050,   0.2554,
          0.0960,   0.1687,   0.3504,   0.1033,  -0.0565,  -0.2252,   0.0271,
          0.0148,   0.1812,   0.3166,  -0.0429,   0.1447,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 7.1077e+00, -3.6830e+01,  2.0951e-02, -2.7770e+00,  8.8194e-02,
         5.3411e-01, -1.6063e+00,  1.2308e+00,  1.8933e-01, -3.8781e-01,
        -6.5101e-01, -2.5683e-01, -1.1163e-01,  7.8836e-03,  1.0614e+00,
        -2.3552e-01, -2.5979e-01, -1.1564e+00, -3.8582e-01, -1.7694e-01,
         8.5046e-03,  1.7057e-01, -1.3929e-01, -6.6466e-03,  4.5583e-01,
         1.4832e-01, -5.0954e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.3287, -43.0472,   1.4065,   2.7620,   0.4355,   1.0547,  -1.9119,
         -0.6494,  -0.4713,  -1.6362,   0.3601,  -1.6889,  -0.5616,   0.5395,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6437e+00, -3.3523e+01, -1.7339e+00,  3.1323e-01, -8.2855e-01,
        -1.2295e-01,  4.7353e-01,  1.8046e+00, -4.5259e-01, -1.0057e-01,
         4.8284e-01,  9.8549e-02, -3.0212e-01, -3.4790e-01, -5.7484e-01,
        -2.6224e-01,  4.2020e-01, -4.0987e-02,  3.2540e-01,  3.8572e-01,
         1.1214e-04, -9.5643e-02,  2.3183e-01, -3.0056e-02, -1.4625e+00,
         3.1342e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4843, 24.7892,  1.6592,  1.6556,  0.9757, -0.7316,  0.7104,  0.2236,
        -0.7731,  0.0669,  0.1006, -0.9014,  1.7020, -2.5863,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.1704e-01,  1.5808e+01,  3.2062e-01, -5.2125e-01, -7.4227e-02,
        -2.1840e-01, -4.4007e-01, -2.7874e-01,  7.7108e-02, -1.0477e-01,
        -6.3422e-02, -1.4724e-01, -5.4129e-02, -3.4336e-03,  1.5330e-01,
         1.4733e-01, -4.7547e-01, -2.9999e-01,  6.2071e-02, -2.6073e-01,
        -1.3623e-02, -1.4019e-01,  3.2911e-01,  2.5856e-01, -6.0908e-02,
        -1.5855e-01, -3.0097e-01, -1.8377e-01, -2.1132e-01,  1.0452e-01,
         2.3695e-01,  4.3019e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2268e+00,  1.6077e+01,  4.6540e-01, -5.0528e-01, -6.2723e-01,
        -1.6446e-01, -1.5372e-01, -9.3333e-02, -3.7848e-03, -1.7243e-01,
        -3.5883e-02,  3.7122e-02, -1.3134e-01,  5.3600e-02, -1.8580e-01,
        -8.3086e-02,  9.2752e-02, -1.9490e-01,  1.1335e-01,  1.2287e-01,
        -6.6914e-03, -4.3709e-02, -1.0603e-01, -3.6303e-01, -5.1232e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2023e-01,  3.6451e+00, -4.1883e-04,  2.1422e-01,  4.2863e-01,
         1.7866e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3801e+00, -3.4911e+01, -2.6464e+00,  3.3920e+00, -7.7368e-01,
         1.2978e+00,  8.5546e-01,  1.1305e-01, -1.4941e-01,  4.7775e-01,
         4.2169e-01, -8.2449e-01,  7.5194e-02, -2.4976e-01,  3.7676e-02,
         1.8793e-01,  2.7783e-01, -4.1320e-02,  1.0503e-01,  1.4911e-02,
         6.1524e-02, -2.5275e-01,  2.4905e-01,  1.1453e-01, -4.5298e-02,
         1.9027e-03,  1.1621e-01,  2.0537e-02,  1.3298e-01,  1.5020e-01,
        -7.9130e-02,  6.5074e-03, -1.0009e-01, -8.3354e-03, -4.6194e-01,
        -1.0211e+00, -1.5203e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2298e+00, -1.8460e+01, -7.6345e-01, -1.3190e+00,  5.5660e-02,
        -4.8691e-01, -2.9713e-01,  4.1336e-01, -8.8209e-02,  3.8065e-02,
         9.1343e-03, -8.1138e-02, -2.5214e-01, -3.2774e-02, -4.5135e-02,
         3.4594e-01,  1.6549e-01, -2.2382e-01,  2.2173e-02, -1.1098e-01,
         4.8536e-01,  2.0973e-01,  4.3631e-02,  1.0024e-02,  7.2218e-02,
         1.5310e-01, -3.5723e-02, -1.5848e-01,  2.0660e-01,  7.2331e-02,
         1.2052e-01, -5.0766e-03, -5.7508e-02,  6.5653e-02,  3.4944e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1715e-01,  8.2444e+00, -2.6313e+00,  2.1803e-01, -2.3598e-01,
         3.2214e-01, -1.4863e-02, -2.8880e-02,  1.1525e-01, -6.9269e-02,
        -2.4867e-02,  4.7793e-03, -2.8743e-01, -1.6737e-01, -2.8766e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5156e+00, -2.3200e+01, -8.7758e-01,  1.2643e-01, -1.5428e-01,
        -2.0705e-02, -5.6264e-01, -2.5935e-01, -1.3063e-01,  2.3208e-01,
         1.5561e-01,  2.1591e-01, -7.8608e-02,  1.7295e-01, -4.6151e-02,
        -4.0739e-01, -1.9525e-01, -2.0057e-01, -4.1145e-02, -2.0624e-02,
        -1.2556e-02,  2.9218e-01,  8.6759e-02,  1.0262e-01,  3.0846e-01,
         1.3260e-01,  2.8661e-02, -1.1376e-01,  2.6330e-02, -2.5153e-02,
         6.2382e-02, -7.1778e-01, -1.6741e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.0667e-01, -3.2391e+01,  1.1485e+00, -1.8138e+00,  3.2229e+00,
         2.4405e+00,  3.9574e-01, -2.8053e-01,  6.1163e-02,  4.8364e-01,
        -1.4880e-02, -1.2973e+00,  7.9870e-02, -3.9760e-01, -5.1491e-01,
        -3.0300e-01, -2.4705e-01, -1.5163e-01, -1.6478e-02, -1.1478e-02,
         3.4207e-01, -7.8990e-02,  9.8989e-02, -3.3434e-01,  3.5814e-02,
         1.9716e-01,  2.2494e-01, -4.0233e-02, -5.8807e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 2.1922e+00, -6.3075e+01,  2.2608e+00,  2.9500e+00,  9.0355e-01,
         1.0280e+00,  7.3108e-04,  3.7508e-01, -4.2612e-01,  3.1482e-01,
         7.9337e-02, -3.7136e-02,  7.3219e-01,  2.6991e-01,  1.8231e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5403e-01, -4.1444e+01, -1.5835e+00,  2.2043e+00,  8.0048e-01,
         2.4205e-01, -3.0759e-02, -8.8531e-02, -5.3454e-01,  2.8097e-01,
         3.0259e-01, -4.7475e-01,  3.3229e-01, -3.5602e-01,  1.5822e-01,
        -1.8854e-01, -1.2491e-01, -1.0280e-01,  1.0201e-02,  2.2231e-01,
        -3.7894e-03, -1.4841e-01, -5.8883e-02, -1.6157e-01, -1.0296e-01,
        -2.4705e-01, -2.7638e-01, -3.8323e-01,  8.1750e-01,  1.4772e-02,
         7.8980e-02, -7.5540e-03, -1.6045e-01, -1.2833e-01,  5.4718e-01,
        -9.7807e-02, -1.7689e-01, -1.0344e-01, -1.2764e-01,  2.4636e-01,
         2.4111e-02,  2.2339e-01, -3.4320e-01,  1.0339e-01, -2.6990e-01,
         1.0552e-01, -3.6776e-01,  4.9748e-01,  8.1226e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3121e-01,  8.5223e+00, -1.9657e-01,  1.1735e-01,  1.2908e-01,
         1.1759e-01, -1.4319e-02,  8.0968e-03, -9.4908e-03,  1.6150e-01,
         6.7176e-02, -7.1441e-03,  8.6693e-02, -4.6169e-03,  5.1151e-02,
        -4.8068e-02,  4.8889e-02,  5.6014e-02,  4.4333e-02, -3.8848e-03,
        -4.5886e-02, -5.1207e-02, -3.9837e-02,  1.8434e-02,  7.8758e-03,
         5.8517e-03, -1.4152e-01,  8.9831e-02,  1.8513e-02,  1.0439e-02,
         7.2338e-03, -6.5428e-02, -1.1457e-02,  5.1203e-01,  1.9346e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.8243e-01, -1.1086e+01, -2.4397e-01, -6.5202e-01,  4.8093e-03,
        -1.2186e-01, -1.0326e-01,  1.0430e-01, -2.1778e-01,  7.8527e-02,
        -1.4247e-01, -7.5314e-02,  9.5674e-02, -9.8108e-02, -2.7663e-01,
        -8.3117e-03, -3.3836e-02,  1.7585e-01, -3.2430e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.0235, -20.8901,   0.2595,  -0.6224,  -0.5432,  -0.0681,  -0.4958,
         -0.3387,  -1.0178,  -0.1206,   0.2696,  -0.2304,  -0.2821,   0.6403,
          0.1650,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.1061, -38.0209,   0.5175,   0.3682,  -0.5677,   0.4495,   0.8119,
          0.6903,  -0.1210,   0.4197,   1.1086,   0.5614,   0.8695,   0.7675,
         -0.9383,   1.3598,   0.1014,   0.2465,  -0.0950,  -0.8823,   0.4861,
         -1.0516,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0017e-01,  3.7870e+01, -9.6563e-01,  1.1363e-01,  7.2787e-01,
        -5.7410e-01, -1.3889e+00,  2.6256e-01, -3.0397e-01,  1.5478e-01,
        -3.2031e-01,  3.2675e-01, -1.5611e-01, -2.4041e-01, -4.4357e-01,
        -1.0053e-01, -2.6113e-01, -7.0138e-02, -5.5562e-02, -2.4554e-01,
         5.5873e-01, -3.4897e-02, -9.2937e-02, -7.0182e-02, -5.6001e-01,
        -2.4406e-01, -7.3375e-02, -3.3816e-01, -2.3651e-01,  9.7368e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5760, 45.7555, -4.5851, -1.8810, -0.5494, -1.6616, -2.4501,  0.3666,
         0.6034,  1.3825,  1.1333,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3972e-01,  6.6280e+01,  9.7029e-01,  1.2131e+00,  6.1886e-01,
        -1.2393e+00, -8.0805e-01, -1.7327e-01,  1.0810e-01,  1.6196e-01,
        -1.2158e+00, -4.6769e-02,  1.0260e+00,  3.5139e-01, -2.5020e-01,
        -1.7955e-01, -6.2856e-01, -3.1948e-01, -3.8248e-02, -1.1396e+00,
         1.4002e+00, -7.4879e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8858e+00,  3.3481e+01,  1.3957e+00,  1.0756e+00, -1.3577e-01,
         6.3710e-01,  1.4818e-01,  1.4392e-01, -2.3927e-01, -9.5470e-02,
         5.3068e-01, -5.0757e-01, -2.7382e-01, -3.5723e-01, -1.2337e-01,
         1.9792e-01, -2.1012e-01,  1.0812e-02,  1.6618e-01, -3.1482e-01,
        -1.3029e-02,  7.1169e-02,  3.2891e-01, -8.6361e-02, -3.5983e-01,
         3.0411e-02,  2.9133e-01,  7.0055e-02,  3.0205e-01, -9.8153e-02,
         1.8211e-02,  1.1746e-01,  6.1407e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5735e-03, -3.9548e+01, -5.6475e+00,  1.1375e+00,  4.7411e-01,
        -2.2209e-01,  8.0406e-01,  1.8675e-01, -4.7157e-01,  3.8964e-01,
        -2.1933e-02, -1.2016e-01,  5.1384e-02, -2.2084e-01,  7.4392e-01,
        -6.3368e-01,  1.9118e-01,  4.9150e-02, -5.6556e-03,  2.8567e-01,
        -1.7535e-01, -1.3021e-01, -4.2674e-01, -1.8808e-01,  2.7209e-01,
        -2.8555e-01,  7.7344e-01,  1.3495e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5418, 64.7530,  5.9095,  2.8787,  0.5909, -0.3728,  0.4581, -0.5026,
        -1.6008,  0.2868, -0.9670,  0.6013,  0.0961,  0.4840, -0.6395, -0.3183,
        -0.5072, -0.3902, -0.7630,  0.2201, -0.0733,  0.1652, -0.3070, -0.5768,
        -0.6629,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 1.3141e+00, -5.9323e+01, -1.8228e+00,  6.1200e-01,  3.2470e-01,
        -9.7991e-01, -1.3507e+00,  5.8046e-01, -6.7194e-01, -6.1626e-02,
         3.1375e-01, -8.7058e-01, -1.9530e-01,  3.8155e-01, -7.1690e-02,
         1.8147e+00,  8.9339e-01,  3.0911e-01, -1.7566e-01, -1.0630e-02,
        -4.8620e-02,  1.4611e-01, -7.6864e-03, -3.0250e-01, -6.6572e-01,
         7.2964e-01, -2.8479e-01,  5.3719e-01,  7.2621e-01,  3.5859e-02,
         2.3463e-01,  7.4346e-01,  7.0623e-01,  2.1919e-01,  7.7193e-01,
         2.9036e-01,  3.4510e-01,  1.8927e-03,  7.7140e-02, -7.0341e-01,
        -5.8925e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2831e+00,  6.1863e+01,  4.2305e+00, -2.4409e-01,  3.4089e+00,
         2.4723e-01, -6.3460e-02,  6.9335e-01, -7.1668e-02,  2.0969e-02,
        -9.9875e-01, -4.5614e-01,  5.1093e-01, -1.3061e-01,  9.8256e-01,
         6.7134e-01, -3.1914e-01, -6.9486e-01, -2.2060e-01, -7.3593e-01,
        -5.2496e-01, -3.0091e-01, -7.9427e-02,  1.8936e-01, -4.6393e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.4349, -13.5189,  -1.1313,   1.2727,  -0.3416,  -0.0415,  -0.2222,
         -0.1912,   0.0717,   0.1408,  -0.0691,  -0.3411,   0.1816,  -0.1237,
          0.0436,   0.1038,  -0.2510,   0.0803,  -0.2352,  -0.0240,  -0.4238,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0212, 41.4591,  1.7787, -0.7349, -1.4677,  0.4284, -0.0864, -0.2959,
         0.1445, -0.3742, -1.1281, -0.7683, -0.3439, -0.7876, -0.4499, -0.9415,
        -0.5446,  1.0035,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1555e-01,  1.6586e+01,  6.3669e-01,  5.0459e-01,  2.9812e-01,
        -1.7300e-01,  3.0727e-01, -6.7882e-01, -3.5424e-01,  3.3163e-02,
         4.0700e-02,  3.3477e-01, -1.1876e-01,  6.4593e-02, -3.0794e-02,
         4.7170e-01, -3.6876e-01, -2.0486e-01,  2.2185e-01,  3.1199e-01,
        -5.9791e-02,  2.3639e-02,  7.9171e-02, -1.2587e-01,  1.6241e-01,
        -8.7636e-02, -2.4062e-01,  2.9123e-01,  1.0286e-01,  8.1527e-03,
         6.2966e-02,  7.1489e-02,  5.4862e-02,  3.4219e-02,  3.9414e-01,
         2.8186e-01,  4.4031e-04,  9.6852e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9691e+00, -1.3505e+01, -4.1844e-01,  1.1725e+00, -3.8980e-02,
        -4.3683e-02,  2.3134e-02, -1.3901e-01, -6.4936e-02, -1.0728e-01,
         1.9285e-01,  2.0887e-02, -4.4991e-02, -1.0950e-01,  3.7225e-03,
        -1.4407e-01,  2.3584e-01,  4.3496e-01, -1.7545e-01,  1.6813e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7773e+00, -4.3174e+01,  9.3105e-01,  5.3111e-01, -2.6530e+00,
        -1.1806e+00, -2.1700e-01, -2.1374e+00, -2.6188e-01,  2.0263e-01,
         1.8339e+00,  1.0166e+00, -3.3653e-03, -6.1482e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9495e-01,  3.1581e+01, -8.3165e-01, -1.7496e+00,  1.0163e+00,
         2.3093e-01, -9.8994e-01, -4.4141e-01,  3.1085e-02, -5.0815e-01,
        -1.3967e-01, -1.2366e-01, -2.5652e-01, -1.7360e-01,  8.4626e-02,
         5.5857e-01,  1.2863e-01,  3.2079e-01,  4.3774e-01,  4.2424e-01,
         2.0716e-01, -5.8992e-02, -1.1454e-01,  2.3610e-01,  4.5683e-02,
        -7.1476e-01, -5.5901e-03,  3.9483e-01,  3.4049e-01,  1.6553e-01,
         7.0979e-02,  5.6020e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3105e+00, -2.5264e+01, -5.6567e-01, -4.8102e-01,  1.3189e+00,
        -5.2449e-01, -1.9160e-01,  9.8799e-01, -1.5006e-03, -4.1585e-02,
         2.0061e-02,  4.8802e-01, -6.8065e-03, -1.5614e-01, -6.4446e-01,
         8.3969e-02,  1.9283e-01, -8.8966e-02,  6.5247e-01, -3.3887e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6908e-01,  3.1871e+00, -3.5569e-02, -1.3193e-01, -4.4360e-02,
         4.9383e-02,  1.3044e-03,  1.5393e-02, -1.8609e-02,  3.5947e-02,
        -5.5455e-02,  8.4445e-04, -3.9364e-02, -5.8683e-03,  1.5420e-03,
        -2.7663e-03, -1.4450e-02, -2.3463e-03, -4.7132e-02, -1.5285e-02,
         5.7110e-02, -7.2133e-02, -1.0016e-01,  8.4067e-03, -2.0219e-02,
        -4.3744e-02, -1.7611e-01,  1.6452e-02,  7.7863e-02,  6.9728e-03,
         2.4838e-04, -7.7488e-03,  3.7516e-02,  3.3503e-02, -1.0769e-02,
        -2.4719e-02,  2.1164e-02, -7.0051e-03, -2.3960e-02,  1.4373e-02,
        -2.5526e-02,  4.1202e-02,  2.7146e-03,  9.2451e-02,  5.2933e-03,
        -2.5899e-01, -3.1844e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5994e+00, -3.6056e+01, -3.4402e-01,  1.1331e+00, -3.5496e-01,
         2.7849e-01, -1.7972e-01,  1.7742e-01, -1.0285e+00, -3.5065e-01,
        -3.2413e-02, -1.1911e+00,  5.1203e-01, -4.5797e-01,  2.1862e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7251e+00, -4.8071e+01, -3.5701e+00,  7.7979e-02, -3.3444e-01,
        -1.7161e+00,  1.1682e-01, -1.8811e+00,  3.1214e-01,  2.7385e-01,
         5.5007e-01,  6.5168e-01,  4.4014e-01, -4.4950e-01,  7.1179e-01,
         3.1319e-01,  6.9451e-01,  2.1086e-01, -9.4616e-02, -1.1851e-01,
         2.6749e-01,  4.9958e-01,  2.1477e-01,  2.0238e-01, -8.0767e-02,
         2.6678e-02, -4.8112e-02, -1.0323e+00, -2.2691e-02,  1.7502e-01,
         9.8801e-02,  3.9726e-01, -1.0794e-01,  1.5435e-01,  3.5335e-01,
         1.8892e-01,  1.7438e-01,  1.6238e-01,  5.4981e-01,  6.6722e-02,
         3.4142e-02,  1.5998e-01,  2.1642e-01,  2.5142e-01,  1.4251e-01,
         2.4459e-01,  5.4794e-01,  4.9959e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ -3.5153, -34.2053,  -0.2621,  -0.4620,  -0.5369,   0.5672,  -0.1239,
          0.3479,   1.0962,   0.1104,  -0.3947,   0.2446,   0.1240,   0.2103,
          0.4365,   0.3146,   1.5239,   0.6189,   0.4873,  -0.1425,  -0.2973,
         -0.0530,   0.1155,  -0.8945,   0.9722,   0.9941,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4854e+00,  2.3262e+01,  7.2685e-01,  8.4862e-01, -1.7132e-01,
        -1.5807e-01, -1.4419e+00, -1.6625e-01,  8.0520e-01, -1.5496e-02,
         5.0398e-01,  1.3771e-01,  4.0379e-02, -5.7722e-02, -1.9741e-01,
        -1.6700e-01, -5.2588e-01, -4.2910e-01,  1.9386e-01,  4.3769e-01,
        -1.4331e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1192e-02,  1.2381e+01, -7.8469e-01, -3.9086e-02,  8.7925e-02,
        -4.3470e-01, -1.2100e-01, -1.6048e-01, -9.0382e-02, -1.9754e-01,
        -7.7864e-02, -5.4399e-02,  1.3746e-02, -2.8217e-01, -1.4064e-02,
         8.4334e-02,  4.0152e-02,  1.3190e-01,  2.9503e-01, -2.9303e-01,
         2.1462e-01, -1.1579e-02,  1.2991e-01,  9.4804e-02,  5.8511e-02,
        -4.3363e-02, -6.8127e-02,  2.7235e-02, -1.1669e-02, -3.1075e-02,
        -1.7815e-03, -5.3478e-02,  3.7039e-02, -5.8687e-02, -1.5465e-01,
         2.1831e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2337e+00, -4.2870e+01,  7.2354e-01,  9.6834e-01,  3.5748e-02,
         5.3771e-01,  1.7793e+00,  1.0330e+00, -3.6751e-01, -1.8032e-01,
         1.6632e+00,  6.0406e-01,  7.5591e-02,  6.4147e-01,  4.5307e-01,
         1.3959e-01,  4.6993e-01,  3.5154e-01, -6.7155e-01,  1.2784e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3046e+00,  3.4736e+01,  5.5180e-01,  6.9873e-01,  5.5097e-01,
        -2.0670e-01, -5.7342e-01,  1.7892e-01,  3.6738e-01, -6.4770e-01,
         8.1803e-01, -5.2045e-01, -4.6156e-01, -1.9821e-01,  4.3874e-01,
         7.9145e-03, -3.6298e-01, -1.2848e-01, -5.6310e-01,  1.8303e-01,
        -6.6640e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -3.6623, -55.7341,  -3.4043,  -0.6638,   0.4088,  -0.1365,  -1.4325,
          0.6702,   0.3340,  -0.3907,   0.3991,  -1.3336,  -0.3138,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4829e+00, -4.8438e+01, -7.7527e-01,  1.7025e+00, -2.8362e-01,
         7.3488e-02, -1.6584e-01,  2.8285e-01,  3.1699e-01,  5.7617e-01,
         3.1737e-01,  6.0387e-01, -2.9173e-02,  4.4990e-01, -1.9101e-01,
         2.7098e-03,  8.1437e-01, -1.0289e-01, -1.6091e-01, -3.8636e-01,
         1.3834e-02,  8.8929e-02,  2.1280e-01,  3.2725e-01,  3.5860e-02,
         2.2203e-01, -2.3402e-01, -6.6039e-01,  1.3786e-01, -3.3505e-01,
        -9.4722e-03, -1.5197e-01, -2.3582e-01,  3.6901e-01, -6.0422e-01,
         2.5006e+00,  1.9114e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2002e+00, -4.6022e+01, -1.3443e+00,  1.1784e+00, -5.4420e-01,
         4.9241e-01, -7.0474e-01, -1.6996e+00, -3.6929e-02, -2.1555e-01,
         1.1057e-01,  1.3547e-01, -7.0445e-01,  2.9429e-01, -3.2298e-01,
        -3.2339e-01, -9.5565e-01,  1.2402e-01,  3.7741e-01,  7.9006e-01,
        -3.7994e-01, -1.9856e-01,  1.2115e-01,  2.6485e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1558e-01,  3.3291e+00, -5.6257e-02, -4.4429e-02,  5.8895e-02,
        -4.2279e-02, -1.2446e-01, -2.0092e-01, -1.2358e-01, -1.4994e-02,
        -1.6200e-01, -6.2042e-02, -1.3885e-02, -3.3934e-02, -1.0331e-02,
        -1.3275e-02,  3.9809e-02, -1.6449e-02, -2.0946e-02,  4.6288e-02,
         7.4122e-03, -3.9977e-02, -1.3618e-02, -1.3764e-02, -6.2690e-02,
        -2.1692e-02,  7.9082e-03,  2.5211e-02, -3.2612e-03, -3.4336e-02,
        -1.0103e-01, -1.5351e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6716e+00,  2.2871e+01,  9.3893e-01, -3.3389e-02,  1.0286e-01,
         2.0424e-01,  8.9042e-02, -1.2853e+00,  2.0379e-01,  4.5264e-01,
         1.5962e-01, -1.3895e-01, -1.7707e-02, -9.6534e-04, -9.0427e-02,
        -1.0019e-01, -7.1914e-02, -2.8638e-01,  7.8814e-03,  2.9888e-01,
        -2.6790e-01,  2.1019e-02, -6.3515e-02,  2.8496e-01, -9.4294e-03,
         3.1181e-01,  1.3376e-01,  6.7423e-03, -7.5062e-02, -4.3427e-02,
         1.3110e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.3339, -69.0520,   1.8712,   0.4386,  -1.0056,   0.7668,  -1.1623,
          2.1591,   0.1061,  -1.1750,  -1.2612,   0.4793,  -0.5558,  -0.1321,
          0.1687,  -0.5137,  -1.1549,  -0.3736,  -0.1865,  -0.4190,  -2.0878,
         -0.4940,   0.2373,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0746e-01,  1.7528e+01,  7.0054e-01, -3.6405e-01,  2.9997e-01,
         4.6113e-04,  3.0910e-01,  1.0489e-01,  2.4560e-01, -4.9308e-02,
         2.0445e-02, -2.5826e-01, -6.7397e-02, -3.0222e-01, -6.0101e-02,
        -1.8132e-01, -6.4342e-02, -6.5487e-02,  8.6359e-02, -1.2023e-01,
        -2.7749e-01, -3.7282e-02,  1.7145e-01, -8.8679e-02, -9.5595e-02,
         1.7465e-01,  1.0790e-01,  2.4577e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 3.8940e-01, -6.1707e+01,  1.7299e+00,  7.7429e-01,  7.9409e-01,
         2.3596e-01, -8.4645e-02,  2.8629e-01,  4.8937e-01,  6.1560e-01,
        -2.0231e-01,  8.9376e-02, -7.9599e-02,  2.1426e-01,  1.4559e-01,
         1.6187e-01, -2.1261e-01,  1.2149e-01,  9.5565e-02, -3.5240e-01,
         9.6195e-02, -2.9147e-01,  1.0060e+00,  2.0122e-01,  3.0087e-01,
         1.6730e-01, -2.0838e-01, -1.3810e+00,  2.7664e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4546e-01,  5.5983e+01, -5.1605e-01,  1.7033e+00,  4.2191e+00,
         2.4945e-01,  4.6997e-01, -1.4828e+00, -1.2107e-01,  2.1056e+00,
         5.6756e-01, -3.1691e-01, -1.8118e-02,  6.8024e-01,  4.0082e-01,
         6.5398e-01,  4.8164e-01, -4.9388e-01,  1.3395e+00, -1.1882e-01,
        -2.0267e-01,  3.1707e+00, -2.7935e+00,  1.4896e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4048e+00, -4.2823e+01, -2.9871e+00,  1.9687e+00, -1.5211e+00,
        -1.2647e+00, -6.7596e-01, -4.8132e-01, -1.7767e-01,  2.6865e-01,
        -2.8491e-01, -1.5111e+00, -8.2395e-01, -1.6067e-01,  1.2362e-01,
         4.4127e-02,  1.0985e+00,  1.1788e-01, -4.8198e-01,  1.9380e-01,
        -5.6035e-01,  1.3458e-01,  2.2351e-02,  1.8564e-01, -5.1517e-01,
        -2.0893e-01,  4.8123e-01, -1.0570e-01, -2.1536e-01, -2.2963e-01,
         2.2877e-01, -1.8592e+00, -7.7860e-01,  5.6709e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9988e-01,  1.6720e+01, -4.3559e-01, -1.0673e+00,  1.9565e-01,
         4.3052e-02,  4.0986e-01, -8.1815e-03,  3.6893e-02,  7.2775e-02,
        -1.6121e-01, -1.1767e-02,  1.1186e-02, -1.4260e-01,  2.5386e-01,
         1.2374e-01, -1.8844e-02,  4.1436e-02,  1.3613e-01,  1.5441e-02,
         4.8602e-01,  1.6838e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.7958e-01, -4.3750e+01, -3.0136e+00,  1.3327e+00, -2.3218e+00,
         5.1298e-01,  1.0969e+00, -4.0164e-02, -1.0571e-01, -5.3532e-01,
        -1.0301e+00, -2.7593e-01,  2.7217e-01,  1.0967e-02, -2.9643e-02,
         2.1250e-01, -5.4131e-01,  1.3553e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7239e-01,  6.2743e+01,  3.4106e-01, -1.7634e-01, -1.8413e-01,
         1.0427e-01,  6.3758e-01, -1.2882e+00, -2.9448e-01,  4.7490e-01,
         3.8315e-01,  5.1456e-01, -3.5436e-01, -3.9977e-02, -2.1426e-02,
        -1.0977e-01, -2.3347e-01, -7.6272e-01, -3.9628e-01, -9.9746e-01,
        -1.4794e+00, -4.2872e-01, -2.5238e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0863e-01, -3.4289e+01,  9.6619e-01, -5.1176e-02,  1.0937e+00,
         3.5044e-01, -4.4506e-01, -1.6760e-02,  4.5321e-01,  2.9676e-02,
        -8.8907e-02, -1.5647e-01,  1.1731e-01, -3.0730e-01, -2.6613e-01,
         8.9888e-01, -4.3847e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0099e+00,  4.7788e+01,  1.9335e+00, -3.3350e-02,  1.1196e+00,
         6.2442e-01,  1.8888e+00, -2.1394e-01, -2.8570e-01, -3.4362e-01,
         9.6961e-01, -2.2397e-01, -4.0664e-01,  9.8874e-01,  2.8622e-01,
        -3.1187e-01, -1.2762e-01, -1.4895e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1118e+00, -2.0851e+01, -8.3501e-01,  3.9038e-01,  1.7412e-01,
         1.8719e-01,  1.4960e-01, -4.3522e-01, -3.6885e-02, -7.5086e-01,
        -5.6377e-01,  3.9596e-01, -2.4251e-01,  2.4140e-01,  2.3713e-01,
         3.0392e-03,  2.2924e-02,  2.8762e-01, -2.9688e-01, -1.1986e-01,
        -1.3215e-01,  6.5800e-02, -2.9082e-01,  1.0203e-01,  5.6594e-01,
         4.7465e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6669e+00, -5.8150e+01, -3.2194e+00, -6.8163e-01, -1.4580e+00,
         1.3418e+00,  8.0170e-01, -2.6553e-01, -1.5861e+00,  2.6785e+00,
         6.2780e-01,  3.7904e-01,  4.7047e-01,  4.9023e-01,  1.2157e+00,
        -6.2550e-02,  6.8487e-02, -7.7228e-02,  5.7254e-01,  1.1320e+00,
         7.3364e-02,  4.4602e-02, -9.9245e-01,  3.1806e-01, -2.9489e-01,
         2.5345e-02,  4.2235e-01,  4.0240e-01,  8.1900e-01, -1.3386e+00,
        -2.4901e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.6294, -38.9841,   0.3151,   0.2966,   1.4102,   0.4003,  -0.4055,
         -1.1864,  -0.4630,  -0.8759,   0.7154,   1.2297,   0.3535,   0.4741,
          0.7387,   0.3296,  -0.0947,   0.1939,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7609, 35.1061, -1.6703,  0.3978, -2.6455,  3.6920,  1.9905, -0.8729,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-3.1048e+00, -4.6569e+01, -3.7387e-01,  4.0022e-01, -1.2977e-01,
        -1.9818e+00,  3.2867e-01,  3.0093e-01,  1.3894e+00,  5.2175e-02,
         2.3656e-01,  6.0860e-01, -3.8643e-01, -6.7720e-02, -3.0105e-01,
         3.1636e-01, -2.1098e-01, -1.6715e-01, -8.1272e-02, -1.0566e-01,
        -3.3175e-02,  1.4288e-01,  3.5798e-01, -1.7611e-01,  2.7140e-02,
        -1.3353e-01,  2.4447e-01, -4.7620e-02,  7.0956e-01,  8.5459e-02,
        -1.1386e-01, -8.0050e-02,  1.3239e-01,  1.9279e-01, -1.5339e-01,
         5.2841e-01, -8.5125e-01, -3.7711e-01, -6.2347e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1875e-01,  1.9384e+01, -6.0443e-01,  9.3653e-03,  9.6223e-01,
         3.4973e-01, -5.2864e-01,  1.1437e-01, -1.3335e-01, -3.1213e-01,
        -1.5921e-01, -1.6518e-01,  1.3024e-01, -2.7669e-01,  1.2529e-01,
         3.5168e-01, -6.6735e-02, -2.6889e-01, -3.6930e-01,  1.5990e-01,
        -2.0968e-02,  6.5957e-01,  1.7467e-01, -1.9645e-01, -5.1851e-02,
        -1.5770e-01, -8.2234e-01, -2.4631e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.4235, -19.3396,  -0.3408,  -0.7789,   0.1492,  -0.0848,  -0.4156,
         -0.2988,  -0.5615,   0.2172,  -0.6868,  -0.2392,  -0.2646,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9239, 19.4835, -0.1157,  0.7290,  0.3254, -0.2730, -1.2115, -0.1493,
        -0.1156, -0.5025, -0.0635, -1.1238, -0.4069,  1.3926, -0.3283,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6586e-01,  5.2554e+00, -2.9702e-01, -2.0632e-01,  8.9988e-02,
         1.0233e-01, -1.2324e-01, -8.6286e-02, -9.7588e-02, -1.2335e-01,
        -9.7410e-02, -5.7461e-02,  1.1623e-01,  5.3839e-02,  4.8412e-02,
         9.3451e-02, -1.6267e-02,  4.2291e-02, -3.9694e-02, -2.4884e-01,
         1.1876e-01, -1.9072e-02, -3.8176e-02,  5.5544e-04,  1.6795e-02,
        -6.0354e-03, -6.4989e-03,  8.9079e-03, -7.7842e-02,  1.3311e-02,
        -1.5050e-01, -5.3377e-02, -6.6143e-02,  1.8914e-02,  5.5074e-02,
        -5.2337e-02,  2.7498e-02,  2.9839e-02, -3.2331e-02,  2.1869e-02,
        -1.7618e-02, -3.8455e-02,  9.3135e-03,  1.0511e-03,  4.0766e-02,
         3.4377e-02, -7.1677e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6616e+00,  3.4856e+01,  1.2374e+00, -4.1624e-01, -9.6453e-01,
         5.2799e-01, -7.2165e-01,  4.9264e-02,  1.1083e+00, -2.4250e-01,
        -3.9776e-01, -1.4197e-01,  3.1753e-01,  2.6733e-01,  1.2562e-01,
        -1.3044e-01,  2.2025e-01,  4.9038e-01,  5.9867e-02, -1.1663e-02,
         2.0644e-01,  1.7360e-02,  3.4899e-01,  6.5909e-01,  5.3558e-01,
        -1.1534e-01,  4.3262e-01, -4.3451e-02,  4.0406e-02,  1.8561e-01,
         2.9246e-01, -1.4160e-01,  1.9178e-02,  3.9107e-01, -2.1110e-02,
         5.0787e-02,  1.0258e-01,  1.5651e-01,  2.5963e-02, -9.5483e-02,
        -7.1172e-02, -1.0011e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6013e+00,  3.2847e+01,  1.5042e+00,  4.2163e-01,  1.0587e-01,
         7.5256e-01,  5.8481e-01, -1.7221e-01,  4.9855e-01,  8.3690e-01,
         4.3896e-01,  9.0336e-01, -6.3854e-01, -1.7650e-01,  6.2332e-01,
        -7.0675e-01,  9.5214e-01,  2.5744e-02,  1.8247e-01,  3.5481e-01,
        -2.9518e-01, -6.3967e-02, -1.9977e-01, -5.9103e-01, -1.7228e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3889e+00,  2.4687e+01,  9.0981e-02,  3.1733e-01, -4.4668e-01,
         5.7474e-01,  1.6629e-02, -5.9839e-02, -2.5099e-01, -2.8982e-02,
         6.4777e-02, -3.3626e-02,  2.1025e-01, -4.8471e-01, -3.8253e-01,
        -3.0214e-01,  1.7919e-02, -1.0624e-01,  1.5492e-01,  4.8019e-01,
         2.7800e-01,  1.7331e-01, -1.0473e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2300e-01,  2.0832e+01,  2.4062e+00, -8.9220e-03, -2.1784e-01,
        -1.5232e-01, -3.7266e-01,  1.5131e-01, -3.9061e-01,  1.3414e-01,
         1.1945e-01, -1.4056e-01,  1.0566e-01, -3.3177e-01,  1.2443e-01,
        -1.2674e-01, -2.5536e-01, -1.4825e-01,  3.8050e-02, -6.0426e-02,
        -1.1761e-02, -3.3630e-01, -4.2220e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6192e+00,  4.2207e+01, -9.8756e-01, -4.4253e+00, -2.4074e+00,
         7.6668e-01,  8.2274e-01,  2.8091e+00, -1.9667e-02,  2.0470e+00,
         2.4187e-01, -8.5488e-01,  1.5475e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1195, 62.8774, -0.6488,  0.2779, -0.6714,  0.0832, -0.1988,  1.3144,
        -0.1273,  1.7531, -0.2887, -1.3023, -1.2238,  0.1875,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5019e+00, -5.1026e+01, -4.4889e-01,  9.6398e-01,  3.4384e-01,
         1.4461e-02,  2.1992e-02,  4.2810e-01, -1.0593e-01,  1.0570e+00,
         2.2187e-01,  5.6469e-02,  2.2697e-02,  6.8788e-01,  6.3219e-01,
         5.4732e-01,  8.1849e-03,  1.4328e-01,  8.1336e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 1.5590e+00,  2.2341e+01, -1.8034e+00, -1.9346e+00, -2.4821e-01,
         5.5935e-01, -9.0458e-03, -1.7594e-01, -3.3913e-01,  2.3857e-01,
        -4.9182e-02,  7.7158e-02,  2.6917e-01,  3.6527e-01, -9.0230e-01,
        -4.7892e-01, -4.3287e-02,  3.4374e-02,  1.5508e-01,  1.7785e-01,
        -1.5909e-01, -3.8375e-01,  1.4221e-01,  5.5399e-01, -8.9604e-01,
        -9.8587e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.8726, -34.6852,   0.0392,   0.6439,   1.4831,   0.9862,   0.1482,
          0.7168,  -0.4620,   0.3640,   0.7044,   0.2055,  -0.7074,   0.3923,
          0.8714,  -0.9230,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.5185, -22.9369,   0.2125,   0.2248,   0.0286,  -0.7414,  -0.3577,
         -0.0983,  -1.2754,   0.3786,   0.1778,   0.3116,  -0.0958,  -0.4687,
         -0.2696,   0.2075,  -0.8642,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3416e+00,  2.5760e+01,  2.2522e-02,  4.9361e-01,  2.2546e+00,
        -9.8505e-02,  9.5700e-02,  5.3719e-01, -6.0249e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1722e+00,  2.0912e+01,  5.0414e-01,  1.8523e-01,  1.2951e+00,
         3.0854e-01,  7.3834e-01, -2.0800e-01, -4.4461e-01,  1.9150e-02,
         2.3910e-01,  1.9283e+00, -2.3514e-01, -7.2971e-02,  7.2795e-02,
         2.3412e-01, -9.6522e-02,  2.3282e-01,  9.4115e-02,  2.0475e-01,
         2.0449e-01,  3.1375e-01, -8.4726e-01, -2.2788e-01,  4.1906e-01,
        -4.0681e-01, -4.2090e-01, -1.5581e-01,  1.3934e-01, -1.0774e-01,
        -3.0613e-01,  1.1151e-01, -2.9458e-02,  3.6313e-02, -9.4028e-02,
        -2.7689e-01, -1.5842e-01,  1.9388e-01, -3.9189e-01, -9.8144e-02,
        -3.6872e-01, -1.5739e+00, -2.9191e-01,  6.6254e-01, -8.7935e-01,
         3.1354e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5459e-01,  2.3505e+01,  6.9116e-01,  1.0309e+00,  2.5005e-01,
        -1.6220e-01, -1.8678e-01,  6.3992e-02, -3.6659e-01, -1.7460e-01,
        -2.3404e-01, -1.1605e-01, -3.5970e-01, -3.5922e-02,  6.6265e-02,
         2.2340e-01,  8.4396e-02, -1.2507e-01,  1.0408e-01,  4.6833e-02,
        -5.2869e-03, -1.1077e-01,  2.1496e-01,  3.5565e-02, -1.4559e-01,
        -1.6380e-01, -1.0082e-01, -5.6431e-02, -3.3637e-01,  7.5746e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4739e+00, -1.8956e+01,  1.7535e-02, -5.5704e-02,  2.5737e-01,
        -1.3125e+00, -3.3438e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3755e+00,  3.0943e+01, -3.3004e-01, -9.3542e-01, -7.7985e-03,
        -3.9710e-01,  1.6770e-01, -1.4175e-01, -1.3989e-01,  2.8258e-01,
         1.8068e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.2311e-03,  3.8679e+01,  3.8026e+00,  4.1425e+00, -3.5842e-01,
        -2.0138e+00,  5.7402e-01,  2.5300e+00,  3.2497e-01, -1.2891e-01,
        -2.2121e-01,  4.7894e-01,  9.6246e-01, -1.9990e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3342e-01, -4.2497e+01, -7.1819e-02, -4.4471e-01, -1.1140e-01,
        -3.9318e-01,  3.8889e-01,  3.8395e-01,  5.8925e-01,  2.3982e-01,
        -6.7921e-02,  6.4001e-02,  1.9426e-01, -2.8430e-01, -2.7080e-01,
        -2.7220e-01,  1.2913e-03,  2.6873e-01, -5.3221e-03,  2.1217e-01,
        -8.3834e-01, -7.2937e-01,  2.1607e-01, -2.3765e-01, -7.2028e-02,
         6.2180e-02,  5.8550e-02,  2.4100e-01,  2.0709e-01, -8.5299e-02,
         4.4138e-01, -1.2126e+00, -2.1657e-01, -1.5515e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8958e-01,  1.1912e+01,  1.1079e+00, -1.1308e-01,  2.8539e-01,
         2.4652e-01,  1.1075e-01, -4.8690e-02, -1.6446e-01,  3.6783e-01,
         4.1467e-02, -7.3034e-02, -6.2747e-02,  1.3817e-01,  2.8973e-01,
        -9.3102e-02, -9.5005e-03, -1.5252e-01, -5.8489e-02,  3.3286e-02,
         8.3110e-02,  1.6836e-01, -9.5040e-02, -4.2679e-02,  9.9174e-02,
         1.7516e-01,  2.3795e-01,  3.8458e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7227e+00,  1.9287e+01, -3.5725e-01, -7.7503e-01, -7.4495e-01,
        -3.5754e-01,  1.6207e-01, -8.6325e-02, -8.4813e-03,  6.6160e-02,
        -5.7120e-03,  2.4369e-01, -4.0101e-02,  7.9870e-01,  3.5061e-01,
         1.7526e-01,  3.1401e-01, -2.3201e-01,  4.3520e-01,  9.8355e-02,
        -7.7767e-02,  5.6536e-02, -5.5443e-01,  4.7737e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-1.4485e+00,  2.6483e+01, -2.7216e+00,  2.2676e-01,  1.5098e-01,
        -5.0108e-01,  8.2008e-01,  3.9696e-01, -2.0971e-01,  2.0956e-01,
        -7.6308e-01,  4.1416e-01,  8.3848e-01,  3.2455e-01,  2.7896e-01,
         3.4959e-02,  4.2842e-02, -1.4199e-01,  2.5376e-01,  2.3326e-01,
         2.3590e-01,  3.1974e-01, -9.1907e-02,  8.1344e-02,  3.0104e-01,
         7.9109e-01, -1.7942e-01, -1.2682e-02, -9.4243e-02, -1.5833e+00,
        -2.3992e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2281e-01,  2.7447e+01, -1.8546e+00, -1.9979e+00,  2.0474e-01,
         3.7425e-01, -2.2846e-01, -3.5108e-03,  1.4472e-01, -1.6816e-01,
         6.3109e-01, -1.4587e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.1643, -52.3146,   0.9993,   3.4112,   0.0722,  -1.1389,   1.6765,
          0.7330,   0.3822,  -0.2756,  -2.7042,   1.9156,  -1.1349,  -0.2572,
         -0.7139,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4457e+00,  1.8175e+01, -4.5416e-01, -6.1771e-01,  4.0089e-01,
        -2.5667e-01,  2.2759e-01, -1.5256e-01, -1.3557e-01,  1.8157e-01,
        -8.9090e-02,  3.3618e-01,  1.9594e-01, -1.0538e-02, -1.1748e-01,
         1.0126e-02,  1.4158e-01, -1.8635e-02,  1.8180e-01, -2.0690e-01,
         8.5015e-02, -2.5424e-02, -1.6590e-01,  1.6820e-02,  1.8871e-01,
        -1.1198e-01, -2.7348e-02, -4.6529e-02, -1.5245e-01,  1.3737e-02,
         2.5373e-01, -4.8508e-02,  3.1367e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8035e-01, -1.9916e+01,  1.3963e+00,  7.5664e-01, -1.8143e-01,
         6.9402e-02,  2.4376e-01, -2.4922e-01,  1.1764e-01,  1.2694e-01,
         7.2511e-01, -1.0425e-01, -3.2410e-03,  1.0272e+00, -1.1613e-02,
         3.8548e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1330,  4.6161, -0.7472,  0.0419,  0.2204,  0.0616, -0.0595, -0.0671,
        -0.0255,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8141, 22.1909,  0.3912, -1.0565,  1.4782,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2828, 49.0328,  0.9209, -0.5253, -0.5227, -1.0194, -0.3500, -1.0852,
        -0.2994,  0.4354,  0.5477,  0.3729, -0.9598, -0.7433,  0.2283,  0.2788,
         0.2616,  0.4577,  0.8559, -0.1811, -0.1094, -0.1539,  0.1896, -0.4176,
        -0.1506, -0.5337,  0.6711,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9435, 51.4685,  2.1268,  0.4385,  3.2943,  2.2468, -4.5514,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6761e-01,  5.9678e+00, -2.7504e-01, -8.4109e-02, -2.1500e-02,
         8.8193e-03,  2.6556e-01,  3.6773e-02, -5.5897e-02, -1.6132e-01,
         9.8032e-02,  4.3922e-02, -5.6890e-02, -3.9443e-02,  2.6158e-03,
        -7.2315e-03, -1.6956e-01, -3.2980e-01,  9.6711e-02,  6.7769e-02,
        -9.9969e-02,  1.2632e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.9839e-01, -4.5688e+01,  2.5999e-01,  6.2685e-01, -1.8459e+00,
         3.3149e-01,  5.6838e-01, -9.9150e-02,  9.9707e-02,  1.3127e-01,
         1.8092e-02,  6.7753e-02,  1.0953e+00,  4.0580e-03, -7.7497e-02,
         2.7369e-03, -7.0373e-02, -6.0185e-01, -1.1276e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1744,  3.3492,  0.0368, -0.0245,  0.0161,  0.0094, -0.0274,  0.1399,
         0.0646, -0.0067,  0.0145,  0.0126, -0.0079, -0.0108,  0.0421,  0.0364,
        -0.0276,  0.0249,  0.0098,  0.0102,  0.0311,  0.0262,  0.0080,  0.0365,
         0.0276,  0.0537, -0.0056, -0.0142,  0.1117,  0.0267,  0.1231,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-2.3311e+00,  4.2893e+01,  1.1639e+00, -5.7599e-01,  1.9665e+00,
        -5.0224e-01, -6.0358e-02,  5.2162e-01, -5.9511e-01,  1.1812e-01,
        -2.0038e-02,  1.2331e-01, -1.1626e-01, -3.9362e-01,  1.1302e+00,
        -6.5225e-01, -4.8868e-01, -1.2322e+00, -1.5398e+00,  2.2455e-01,
        -4.5058e-01, -6.2235e-01,  1.2300e-01, -6.9729e-01,  1.6262e-01,
        -2.9758e-02, -1.8389e-01, -1.3275e+00, -1.1899e-01, -8.5386e-02,
        -9.7572e-02, -1.7768e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0591e+00,  5.3390e+01, -1.4311e+00, -9.1550e-01,  1.7532e+00,
         4.8635e-01, -1.7160e+00,  1.1130e+00,  8.9399e-01, -6.0500e-02,
        -8.9698e-01,  1.9366e-01,  2.3991e-02,  3.4336e-01, -1.9457e-01,
        -3.4924e-01,  3.1609e-03, -1.9044e-01, -7.9993e-02,  2.4080e-01,
         1.6888e+00, -2.9306e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5511,  6.9245,  0.2033, -0.0594, -0.0127, -0.0298, -0.0664, -0.1186,
        -0.1533, -0.0411, -0.0983, -0.0370,  0.0092, -0.0789, -0.0520, -0.0287,
         0.0133,  0.0731, -0.0656, -0.1219,  0.0313, -0.0663,  0.0216, -0.1163,
        -0.0598, -0.0316, -0.0547, -0.1574,  0.3413, -0.0657,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3697e-02,  3.8494e+00,  1.9531e-01, -2.0204e-01, -5.3856e-02,
         1.2415e-02, -9.5450e-02, -2.1151e-02, -4.8045e-02, -9.7915e-02,
        -9.2260e-02,  3.4322e-02, -6.5127e-02,  6.4061e-02, -5.9595e-02,
         1.0271e-01,  2.1861e-02, -7.6181e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5811e+00, -3.7233e+01, -2.7823e+00, -3.9150e-01, -6.1099e-01,
        -5.9043e-01, -1.2643e-01, -6.3467e-02, -4.7690e-03,  6.8585e-02,
         3.2299e-01,  6.7462e-02,  9.1487e-03,  1.9307e-03,  4.7721e-01,
         1.1610e+00,  9.3101e-01,  2.9802e-01, -3.6719e-01,  4.2915e-01,
         2.5240e-01,  1.6706e-01, -5.5966e-02,  5.7568e-02, -2.6936e-01,
         6.1717e-02, -5.4276e-01, -4.8573e-02,  2.4432e-01, -2.6555e-01,
        -6.1372e-02, -1.9948e-01,  1.0385e-01, -6.7780e-02, -1.0620e-01,
         1.1280e+00,  8.1305e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0221e+01,  6.7035e+01, -1.3885e+01, -3.7056e-02,  2.3391e+00,
         2.2122e-01, -4.6859e-01, -2.8594e-01,  4.9660e-01,  6.2965e-01,
         1.4834e-01,  1.3311e+00, -1.0934e-02,  1.3598e+00,  2.6856e-01,
        -2.4277e-01, -1.7929e+00,  3.4197e-01, -6.7905e-01, -4.4239e-01,
        -7.0045e-02, -2.1840e+00,  4.5599e-01, -3.4183e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4951e-01,  1.0215e+01,  1.3474e-01, -1.3011e-01,  6.3493e-01,
         5.4377e-02, -2.5500e-02,  5.3855e-02,  1.1080e-01,  1.7806e-02,
        -3.9922e-02,  6.4236e-02, -6.9338e-02,  4.3859e-02,  1.7051e-02,
         1.6881e-02, -2.1104e-02, -8.0537e-02, -1.2053e-02, -1.0461e-01,
        -2.7917e-02,  8.1943e-02, -6.4612e-03,  2.4975e-02,  1.5520e-01,
         1.2198e-01,  1.1681e-02,  9.8022e-02,  1.6541e-01, -8.8788e-02,
         7.3128e-02,  1.8223e-01,  2.8778e-02,  3.4065e-02,  8.1595e-02,
        -1.1294e-01, -1.1916e-02,  6.8082e-02,  4.2669e-02,  5.7696e-02,
        -4.1051e-03, -6.2126e-02, -3.0532e-02,  3.9653e-02, -1.4857e-01,
         1.1961e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7850e-01,  2.1733e+01,  2.3660e-01, -6.2041e-02, -8.0701e-01,
         4.0076e-02, -8.3935e-01,  1.3353e-03,  4.8850e-02, -4.1685e-01,
        -4.0358e-01, -1.8537e-01, -1.3118e-01, -6.7217e-02,  8.2227e-02,
         1.8190e-01, -1.0026e-01,  3.8777e-01,  1.1099e-01, -8.6955e-02,
         4.7848e-01,  1.5476e-02,  4.1896e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -6.8734, -46.6801,   1.4331,   1.8036,   1.6268,  -0.7529,  -1.8001,
         -0.1267,   1.1796,   1.7845,   0.2592,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3978, 40.8465,  3.2261, -0.8771,  1.1867, -0.8960, -0.9321, -0.8227,
         0.0942, -0.0973,  0.1035,  0.0603,  0.1894, -0.9913,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6209e+00,  3.5437e+01,  1.6066e+00, -7.7229e-01, -4.2697e-03,
         3.0615e-01, -7.9680e-01, -3.9029e-01,  1.5634e+00, -4.5438e-01,
         2.5436e+00,  1.3017e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.6189e-01,  4.8888e+01,  3.0511e+00,  1.3289e+00,  3.3521e-01,
        -1.2560e+00,  1.9248e-03, -2.4546e-01,  8.0196e-02,  4.4272e-02,
        -5.1661e-03, -2.4300e-01,  2.0154e-02,  2.2688e-01, -1.6714e-01,
        -2.6906e-01, -1.7780e-01, -2.5189e-01,  4.8980e-02, -5.4094e-01,
        -2.8555e-01,  6.8173e-02,  1.8937e-01, -1.0539e-01,  2.4076e-01,
         7.7695e-02,  2.4118e+00, -4.2080e-01, -3.5862e-01, -3.1918e-01,
        -2.4059e-01,  1.7280e-01, -2.2129e-01,  6.8644e-01,  9.3597e-02,
        -2.4050e-01,  2.3235e-01, -3.0176e-02, -8.0769e-02, -1.6567e-01,
         1.8519e-01,  6.8702e-02,  3.0331e-01, -4.6683e-02, -1.6489e-01,
         9.7581e-03, -1.6351e-01, -1.7417e-01, -1.6665e-01, -1.0973e-01,
        -6.2831e-03,  2.8177e-01, -2.4162e-01,  1.5458e-01,  5.7432e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.5863, -7.4412,  0.3564, -0.0213,  0.3227,  0.2359,  0.0506,  0.0584,
         0.0621, -0.1075,  0.0523, -0.0246, -0.2336, -0.3362, -0.0839, -0.0514,
         0.2241, -0.2807,  0.0520, -0.1653, -0.0358, -0.2031,  0.0242,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9644, 42.0748,  4.0692,  1.9786, -0.2017,  0.6623, -0.6681,  0.3156,
         0.3680, -0.5031, -0.1233,  0.2336, -0.4445, -0.4503, -0.2520, -0.3079,
         0.9126,  1.2438,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2967e+00, -2.3785e+01, -4.3228e-01, -2.1181e-01, -2.0198e+00,
        -1.3696e-01, -1.0961e+00, -7.4781e-01, -6.8009e-02, -2.7334e-01,
         5.4815e-01,  1.5415e-01, -1.7230e+00, -3.0010e-01,  6.5232e-01,
        -4.8808e-03,  3.8199e-01, -5.5428e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3358e+00,  5.4694e+01, -2.1967e+00,  4.5255e-01,  1.2722e+00,
        -2.3999e+00,  1.0120e+00,  1.0601e+00, -7.9221e-01,  2.3998e-02,
        -5.1028e-01, -2.6195e-01,  7.7484e-02, -1.4460e+00, -4.7348e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4030e+00,  1.8819e+01,  2.2241e-01,  2.7855e-01, -1.3763e-01,
         8.6197e-02,  2.2144e-02,  4.7707e-01,  4.9745e-01,  1.5983e-01,
         8.2623e-02,  2.8920e-02, -9.0439e-03,  1.2985e-01, -2.6043e-01,
         2.6317e-01, -1.6212e-01,  8.4794e-03, -2.5301e-01,  5.9349e-01,
         8.8635e-02, -1.5160e-01, -6.8184e-02,  1.4973e-02, -4.7863e-01,
         1.0180e-01,  4.5208e-01, -6.7914e-02, -6.7633e-02,  5.5037e-02,
        -4.4858e-02, -3.6984e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.9586e-02,  6.3015e+00, -3.6969e-01, -2.4237e-02, -1.4157e-01,
         4.2352e-02, -4.4105e-03,  7.0677e-02, -4.5745e-02, -3.8023e-02,
         2.1788e-02, -5.7443e-02, -1.0804e-01, -2.9759e-02, -2.5699e-02,
        -4.0175e-02, -5.9886e-02, -3.3033e-02, -1.8116e-02, -1.4261e-02,
        -1.5019e-02, -3.4377e-02, -1.1426e-01, -1.1003e-03,  4.1647e-02,
         2.2821e-02,  1.0804e-02, -1.8366e-02,  8.3741e-03, -1.8418e-02,
         4.3081e-02,  9.0158e-02,  2.7052e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9889e+00,  4.1255e+01,  1.0821e+00,  4.1213e-01,  9.3380e-01,
        -9.4581e-02,  3.5304e-01,  5.7907e-01, -2.0249e-03,  9.1099e-01,
         1.5579e-02, -2.3627e-01,  2.3874e-01,  4.6389e-01,  4.8345e-01,
         9.5216e-01, -9.2349e-02, -1.3906e-01,  2.0018e-02, -8.9277e-02,
        -1.8210e-01, -5.2393e-02, -7.6713e-02, -1.0188e-01,  4.1704e-01,
         2.8047e-02,  3.1238e-02,  3.8389e-02,  5.3110e-01, -4.9040e-02,
         4.6862e-01,  1.8490e-01, -1.4252e-01,  1.9058e-01,  1.7336e-01,
        -9.8497e-02,  4.3269e-02, -1.3397e-01, -1.4010e-01,  2.9729e-02,
        -2.3358e-02, -3.2482e-02,  1.3799e-03,  9.2854e-03,  1.7898e-01,
         2.3482e-01,  2.0119e-01,  1.6209e-01,  1.6799e-01,  5.8742e-02,
        -1.0569e-01,  1.3476e-01,  3.3835e-01,  3.2583e-01,  2.2675e-01,
         4.3875e-01,  4.5179e-01, -5.9657e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2247e+00,  2.1125e+01,  1.3749e+00,  2.9895e-01,  1.6339e-01,
         3.6955e-01, -1.6005e-01, -4.9298e-02,  4.2356e-02,  7.6746e-01,
         3.2832e-02,  4.5338e-01,  1.4438e-01, -3.9332e-01,  2.0545e-02,
         2.8368e-01, -2.7313e-02, -1.5818e-01, -6.6845e-02,  9.3985e-02,
         9.1032e-02,  1.1683e-02, -3.4632e-02, -2.0047e-01, -7.8902e-01,
         2.9488e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8739e+00,  4.2418e+01, -1.1858e+00, -3.2261e+00, -1.7055e+00,
         2.0232e+00, -5.1606e-01, -3.2986e-01, -5.1960e-01,  2.9260e-01,
         4.4552e-01, -8.3875e-02,  1.4320e-01,  3.3017e-03,  3.4253e-02,
         8.2962e-03, -8.2987e-02, -4.8897e-01,  2.7362e-01,  2.1573e-01,
        -2.9012e-02, -7.3825e-01, -7.9175e-01, -7.5374e-02, -9.4184e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7018e-01,  3.4718e+01,  2.3679e+00, -1.6722e+00, -5.4289e-01,
        -7.8133e-01, -6.4364e-01, -1.2560e+00,  4.7096e-01,  5.1043e-01,
        -1.7757e-01, -5.3372e-01,  5.7921e-01,  3.6899e-01, -7.4266e-02,
         6.0554e-02, -4.1311e-01,  1.7714e-03, -1.2088e-02, -2.0387e-01,
         1.8587e-02,  4.1274e-01, -7.8772e-02, -3.2230e-02,  4.7844e-01,
         4.0941e-01, -2.5974e-01, -1.4923e-01,  2.7506e-01,  5.4148e-01,
        -1.2344e-01, -4.9116e-01,  5.5898e-01,  1.7030e-01,  4.3833e-01,
         4.3803e-02,  1.7677e-01, -3.7724e-02, -1.4812e-01, -3.3942e-02,
        -4.0023e-01, -1.3654e-01,  1.0023e-01, -4.2964e-02, -1.5050e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.1985e-02,  1.0281e+01,  2.4123e-02,  5.9087e-01, -2.1538e-01,
        -1.4708e-01, -2.2221e-01,  1.5648e-01,  2.4632e-04, -1.5240e-01,
        -1.2286e-01,  1.2000e-01, -1.4671e-01,  1.2277e-01, -2.1713e-02,
         2.6700e-01,  4.5893e-02, -1.6485e-01, -3.2449e-02,  5.9640e-02,
        -9.7422e-02, -4.9957e-01, -5.1664e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5075e-01,  2.4196e+00, -1.6817e-01, -3.3375e-01, -3.1233e-02,
         1.2041e-01, -1.7968e-01, -1.9643e-02, -3.5409e-02, -6.0308e-02,
         1.2966e-02,  1.7681e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([-5.6890e-01,  1.5180e+01,  1.4961e+00,  1.8488e-02, -1.4798e-01,
         3.2892e-01, -1.7314e-01, -2.5160e-02,  2.3701e-01, -3.1657e-01,
        -5.6332e-01, -7.0888e-03, -4.8712e-01,  8.9210e-02, -8.4599e-02,
        -9.5594e-02, -5.8178e-02, -3.5912e-01, -1.0963e-01, -4.8314e-01,
        -4.8977e-01, -4.6096e-04,  1.0937e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9736e+00,  3.9274e+01,  9.5009e-01, -1.0275e+00, -4.8357e-02,
        -4.0782e-01,  1.7983e-01, -1.2913e-01, -6.7539e-01,  4.5986e-01,
         3.3687e-01, -2.1278e-01,  6.9137e-02,  1.4792e-01, -3.3181e-01,
        -2.1671e-01, -4.5258e-01, -9.9820e-01, -6.1589e-01,  1.9415e-01,
         4.9860e-01, -3.4401e-01, -4.8622e-01,  6.8276e-02, -2.3136e-01,
         9.9038e-03, -1.4603e-01,  1.5560e-01, -3.8927e-01, -2.4641e-02,
        -2.1868e-01,  6.4249e-01,  1.1613e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.8679e-01, -9.0255e+01, -3.4522e+00,  1.3754e+00,  1.5693e+00,
         1.7195e+00,  9.8685e-01,  7.6593e-01, -1.2912e+00,  1.3558e-01,
        -9.7380e-01, -1.2910e+00,  1.3560e+00, -5.0906e-01, -2.1916e+00,
         8.0089e-01,  1.1260e+00,  1.2273e+00, -3.4986e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5033, 47.1738, -1.2765, -2.4071, -0.2671,  0.4014,  0.9476, -0.4531,
        -0.4224,  1.4139, -0.5804,  0.6623,  0.5514,  2.8535,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1379e+00,  6.6546e+01,  3.1839e+00, -9.6154e-01, -7.3004e-01,
        -4.7997e-01,  6.3809e-01,  4.2931e-01,  1.3424e+00,  3.5935e-02,
         2.8602e-01, -1.0918e-01, -7.0730e-02, -1.1597e+00,  1.1743e-01,
         2.6976e-01, -2.1219e-01,  3.9990e-02, -2.1030e-01, -2.8109e-01,
        -1.9925e-01, -6.1669e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7135e+00,  3.5774e+01, -5.9698e-01, -5.5312e-01, -1.2763e-01,
         7.1857e-01,  1.0763e-02,  3.5598e-01, -4.1383e-01, -5.6107e-01,
         2.6088e-02, -7.0735e-01,  3.9139e-01, -4.4388e-02, -1.3043e-01,
        -1.9187e-01, -3.1623e-01, -5.0604e-01,  7.4931e-02,  3.5803e-01,
         6.9278e-02,  1.8383e-01, -4.2595e-01, -3.6110e-01, -4.3553e-01,
        -1.0308e-01,  1.9207e-01, -4.4827e-02, -1.9235e-01, -7.0642e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3719,  1.8284,  0.1020, -0.1695, -0.0533,  0.1497,  0.0630, -0.1033,
         0.0224,  0.0087, -0.0781,  0.0479, -0.0285,  0.0497,  0.0391,  0.0338,
         0.1504,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7968, 47.9728,  2.2576, -0.6601, -0.0934,  0.9070, -1.8162,  1.0173,
        -0.4103, -0.7000, -0.3529,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4316e+00,  5.2284e+01,  5.0056e-01,  1.3927e+00, -1.0863e+00,
        -2.3358e+00, -2.6072e-01,  3.2134e-01,  3.6426e-02,  8.2909e-01,
        -1.1486e-02, -3.1535e-01, -6.1135e-01,  6.8408e-01,  1.7946e+00,
        -5.1884e-01, -4.9728e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.2423e+00, -6.0890e+01, -1.9884e-01, -3.8711e+00, -2.3115e+00,
        -1.2680e+00, -1.2161e-02, -4.5034e-01, -1.2844e-01,  4.6065e-01,
         2.4362e-02,  1.4863e-01, -7.7483e-04,  5.5335e-01,  1.6036e-01,
        -3.3991e-01,  6.8033e-02, -4.6790e-01,  4.2407e-01,  3.9891e-01,
        -3.9603e-01,  1.3119e-01, -8.0876e-01,  5.5518e-03, -3.3226e+00,
         3.5289e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7205e+00,  5.3262e+01,  3.6361e+00,  1.1372e-01, -1.6591e+00,
        -4.7938e-02, -1.0585e+00, -1.7527e+00, -1.7523e+00,  1.6516e-01,
         4.0093e-01,  4.2202e-01, -2.6317e-01, -2.8638e-01, -1.7535e-01,
         1.7167e-01, -4.5929e-01, -1.0999e-01, -2.1482e-01, -3.3505e-01,
        -4.7892e-01, -9.1196e-02, -1.5438e-01, -4.3137e-01,  1.0829e+00,
        -2.6581e-01, -3.2300e-01,  1.0820e-01, -4.5802e-01, -2.3413e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9259e+00,  6.3033e+01, -1.1425e+00, -7.8583e-01,  2.9506e+00,
        -6.9964e-01,  6.3406e-01, -6.1758e-01,  6.4869e-01,  4.2002e-01,
        -2.6740e-01, -1.0001e+00, -2.7754e-01, -8.9861e-02,  3.1355e-02,
         2.3061e-01,  1.4562e+00, -6.9079e-01,  5.8650e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 3.0868e-01,  3.1874e+01, -6.7197e-02,  3.4171e-01, -7.8380e-02,
         1.1792e-01,  5.7050e-01, -3.0627e-01,  2.3300e-01, -2.9342e-02,
         1.7808e-01, -2.0007e-02,  1.1717e-03,  2.1373e-01, -6.4519e-01,
        -1.3082e-01,  3.8083e-01,  9.0449e-01,  8.0408e-02,  1.8730e-01,
         7.4152e-02, -7.6467e-02,  5.4347e-01,  3.1636e-01, -2.5732e-01,
        -3.4183e-01, -6.7740e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5509, 48.1558, -1.1016, -3.5291,  1.1971, -0.6748,  0.3182,  0.5427,
         0.7049,  0.5212,  0.3372,  0.9926,  2.7102,  0.2044,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2980e+00, -2.3416e+01,  4.8070e-01,  7.9303e-01,  8.1911e-02,
        -2.3521e-01,  6.9580e-01, -1.2670e-01, -3.5570e-02,  1.1921e-02,
         5.2850e-02,  4.9395e-01,  6.9987e-02, -2.5662e-01,  1.8556e-01,
         5.1384e-01,  3.0485e-01, -4.9404e-03,  1.9147e-01,  1.0809e-01,
         1.5225e-01,  3.4791e-02,  2.4831e-01, -1.6724e-01,  2.1489e-01,
         1.0525e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4227e-01,  1.4554e+01,  7.3598e-01,  2.2213e-01,  2.4502e-01,
        -1.1150e+00,  3.9349e-01,  2.4723e-01, -2.7999e-01,  1.1681e-02,
        -1.8427e-01,  4.6512e-02,  3.6988e+00,  4.0629e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4868e+00,  3.4257e+01, -2.4682e+00, -4.2913e-01,  3.7669e-02,
        -3.3749e-01,  4.2542e-01, -2.7145e-01,  1.2170e-01,  2.2322e-01,
         2.7791e-01, -4.2911e-01,  5.2753e-02, -6.1158e-01,  7.5874e-02,
         3.9311e-02, -4.5096e-01, -3.2334e-01,  1.9216e-01, -5.3819e-01,
         5.6777e-02, -2.7716e-01,  2.3873e-01,  2.1727e-01,  4.8936e-01,
        -1.5136e-01,  1.1631e-01,  3.2961e-02, -5.4102e-01, -1.3424e-02,
         9.1061e-01,  1.7066e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6201, 21.6283,  0.5020, -2.0133, -0.5011, -0.2497, -0.3506, -0.4908,
         0.0814, -0.3973, -0.1768, -0.0523, -0.2534,  0.0574, -0.1742,  0.2285,
        -0.1103, -0.5701,  0.2065,  0.0763, -0.2668, -0.3260, -0.0882, -0.4379,
        -0.6132,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5498,  5.7274,  1.1483,  0.6542,  0.2114, -0.4595,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5711e+00,  5.4194e+01,  2.8399e+00, -4.2669e-01,  1.2118e-01,
        -1.5475e-01, -3.5726e-01, -1.0900e-01,  3.4848e-01,  2.3652e-01,
        -2.8321e-01,  5.3263e-01,  8.8467e-03,  4.5935e-01, -1.1618e+00,
        -3.2346e-02, -5.2354e-01, -4.4759e-02, -4.8579e-02,  4.6025e-01,
        -7.2709e-01, -3.3328e-01, -7.4913e-01, -4.2887e-01, -8.1271e-02,
        -1.9892e-01, -3.3368e-01, -5.9236e-01,  1.4881e-02,  4.8921e-01,
         3.2476e-01,  2.2099e-01, -2.5100e-02, -2.3695e-01,  1.5427e-01,
        -6.3756e-01,  3.2573e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.3624e-01, -3.1804e+01, -1.2339e+00, -2.2788e+00, -4.5941e-01,
        -8.1304e-01, -2.8304e-01, -2.3978e-01,  1.3657e-01,  1.5142e-01,
         2.7970e-01,  2.6234e-02,  2.3604e-01,  1.1868e-01, -2.7501e-01,
        -3.3898e-01,  1.3606e-01, -1.5355e-01,  3.3001e-02,  3.1568e-03,
         3.2863e-01,  1.5016e-01,  5.6769e-01,  7.7913e-02, -7.7272e-02,
        -9.9878e-02, -4.0436e-01, -1.0874e-01, -4.4066e-02,  6.5051e-02,
        -2.3359e-02, -6.3767e-02,  4.7742e-02, -7.8117e-02,  6.1294e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1005, 28.7662, -1.9354,  2.1371,  0.6694, -0.1887, -0.2477,  0.2424,
        -0.1666,  0.1272,  1.0152, -0.0458,  0.6098, -1.3786,  1.1717,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0450e-01, -7.8012e+00, -2.1736e-01,  3.4718e-01, -1.0524e-01,
         1.1652e-01, -1.0890e-01, -5.4132e-02, -1.6116e-01,  4.1809e-02,
        -3.2076e-03,  1.7795e-02,  5.3708e-02,  1.0594e-02, -5.7492e-02,
         1.0132e-01, -8.3607e-02,  1.2701e-05, -2.2321e-02, -4.7701e-02,
         2.0408e-02, -4.2636e-03,  3.0068e-02, -8.2712e-02, -1.0950e-01,
        -5.0607e-02,  5.9308e-02, -4.4223e-02, -3.8893e-03, -2.3614e-02,
        -2.0430e-02,  5.5871e-02, -4.7712e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.2483, -26.5213,  -0.2856,   0.0747,   0.0268,   2.3467,   0.2022,
         -0.1940,   0.8172,  -0.3609,  -0.6245,  -1.2139,  -0.3001,  -0.2211,
          0.4882,   0.4112,   0.2866,   0.1407,   0.8587,  -0.1769,   0.6027,
         -0.0856,   0.0273,  -0.1276,  -0.0993,   0.3728,   0.5458,  -0.6936,
          0.3834,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 2.7685, 24.1766, -0.7540, -0.2432, -0.5126, -0.4085,  0.1851, -0.3965,
        -0.1786, -0.8512, -0.1238, -0.0870,  0.2415,  1.5157, -1.7920,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3388e+00, -6.0610e+01, -9.6049e-01,  6.3011e-01,  4.7878e-01,
         1.0012e+00,  1.2404e-01, -4.2568e-01, -5.2831e-01, -4.7587e-03,
         3.4498e-01,  4.6938e-02,  1.4995e-01,  7.8112e-02, -2.5050e-02,
         3.8106e-01,  4.6240e-01,  6.9866e-02,  1.6457e-01,  1.5637e-01,
         2.2038e-01,  9.3495e-02,  2.7037e-01, -5.7631e-02,  2.2884e-01,
        -1.7195e-01,  1.6293e-01, -5.4580e-01,  2.9792e-01,  1.8483e-02,
        -1.0030e-01,  3.0774e-01,  5.9945e-01,  1.8582e-01,  1.5364e-01,
        -3.3494e-01, -1.1649e-01,  3.2327e-01,  6.3784e-02,  1.1367e-01,
         2.1903e-01, -1.2833e-01, -3.3168e-01,  2.9928e-02,  1.0409e-01,
         1.1836e+00,  1.7139e-01, -2.9209e-01, -4.2766e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.7350e-02,  3.6506e+00, -2.6177e-01,  1.5557e-02,  1.2643e-01,
         3.6186e-02, -5.0855e-02,  7.9131e-02,  1.9025e-02,  1.7290e-02,
        -3.3972e-02, -3.4435e-02, -7.1624e-03,  2.8373e-02,  1.0389e-02,
        -7.8860e-03,  7.3547e-02, -4.4413e-03,  3.1042e-02, -5.4015e-03,
        -2.8853e-02,  2.7725e-02,  1.4915e-02,  2.6961e-03,  4.7933e-03,
         1.3191e-02, -4.5851e-03,  2.6786e-02, -8.4942e-03, -1.7690e-02,
         2.4611e-02,  2.2000e-02,  1.1618e-02, -6.0801e-02, -6.2140e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1235, 35.3389, -2.5005, -0.9562,  1.1137, -0.4240,  0.2244,  0.0568,
         0.1371, -0.0740,  0.2155, -0.1256, -0.5283, -0.1978,  0.5693,  0.5516,
        -0.0934, -2.4008,  0.7885,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.2016e-02, -1.3683e+01,  3.4507e-01, -4.1279e-01, -5.1439e-02,
        -2.9138e-01, -1.1693e-01, -1.2783e-02,  1.9045e-01,  1.5607e-01,
         3.4693e-01,  1.5058e-01, -4.9445e-01, -3.7687e-01,  1.3106e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3441e+00, -5.2764e+01,  4.0655e-01,  2.2344e-01, -2.5723e-01,
         2.2062e+00,  2.3748e+00, -2.7762e-01,  1.1181e+00, -4.5117e-01,
         5.7796e-01,  1.1178e-01,  1.2426e+00,  5.3014e-01,  7.3366e-03,
         9.5538e-02,  3.0748e-01,  1.8863e-01, -9.7318e-02, -1.2304e-01,
         1.4419e+00,  5.0148e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9207e+00,  2.2325e+01,  5.1623e-01,  1.0178e-01,  2.8410e-01,
         7.8970e-02, -7.7124e-01,  5.6181e-02,  8.3174e-02,  4.3254e-01,
         6.2178e-02,  3.2921e-01,  2.9341e-01, -4.0683e-01, -2.9687e-02,
        -4.6834e-02, -5.9934e-02,  1.9979e-01,  8.5806e-02, -1.7819e-02,
         1.8987e-01, -5.4201e-02,  2.2781e-01,  4.9648e-01,  4.5583e-01,
         1.4941e-01,  1.5003e-01, -3.8655e-01, -1.3971e-01,  9.5321e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5573, 36.8706, -0.1498, -2.0693,  1.2414, -0.1086, -1.5199, -0.4596,
         3.6558, -0.3267,  0.6460,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2163e+00, -6.4888e+01, -1.2657e+00, -3.6276e-01, -4.2573e-01,
        -1.2603e+00,  1.1032e+00, -3.8290e-01, -1.4523e-01, -2.7031e-01,
         1.0205e+00, -8.6128e-01, -5.9981e-01, -3.1094e-01, -2.8330e-01,
        -1.9126e-01, -2.2679e-01,  3.6898e-02, -1.9865e-01,  6.3531e-01,
        -4.2663e-02, -1.0966e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2384e+00,  3.6752e+01,  2.4842e+00, -1.4656e+00,  5.8158e-01,
         3.3386e-01, -3.7922e-01,  7.3829e-02, -4.0777e-01, -4.4571e-01,
        -1.2216e-01, -5.4851e-01, -5.8855e-01, -2.9310e-01, -2.6680e-01,
        -5.5619e-01,  1.4679e-02,  5.7088e-02, -2.5138e-01, -8.8170e-02,
         1.7903e-02, -2.0654e-01, -9.1682e-01, -4.8607e-02, -3.4036e-02,
        -1.5006e-01, -1.3983e-01, -2.1940e-01, -5.4374e-01, -5.1568e-02,
         1.4573e-01,  5.9395e-02,  5.0168e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0613e+00,  8.1735e+01,  8.1617e+00, -5.1782e-01, -1.2558e+00,
         2.5219e+00, -4.4758e-01, -3.6851e-01,  4.9896e-01, -5.4676e-02,
        -7.7010e-01, -4.9442e-01,  4.8724e-02, -1.6996e+00, -2.5594e+00,
         5.6916e-02,  3.5925e-01,  9.6112e-01, -3.6704e-01, -2.6499e-01,
         1.4998e+00, -9.9637e-01, -3.2339e-01, -6.4567e-01,  5.5576e-01,
         7.1057e-01,  1.0516e+00, -2.3402e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0524e+00, -3.2573e+01, -2.3239e+00, -8.7445e-01,  2.8338e-01,
        -5.0501e-01,  2.8464e-01, -4.2192e-02,  6.9847e-01, -6.8174e-02,
        -2.8778e-02, -3.4190e-02, -8.8711e-03,  2.1401e-01, -5.8837e-02,
         1.8393e-01, -1.9796e-01, -1.2830e-01,  1.9814e-01, -4.1286e-01,
         1.0185e-01,  3.0376e-03,  7.8368e-02, -8.6848e-01,  1.4930e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-9.8541e-01,  5.1585e+01,  1.3156e+00,  4.6248e-01,  3.3184e-01,
         6.8993e-01,  1.9644e-02, -5.2231e-01,  2.4525e-01, -1.0867e-01,
        -3.9952e-01, -6.0658e-03, -2.9014e-01, -4.4033e-01,  5.6419e-01,
        -3.4078e+00, -4.6283e-01,  3.0508e-02, -3.0145e-01,  1.6522e-01,
         2.6820e-02, -3.7433e-01, -1.3714e-01,  3.2220e-01,  9.5543e-01,
         1.5281e-01,  2.5632e-01,  3.5597e-01, -6.7034e-01, -5.4287e-01,
        -2.9580e-02, -2.1836e-01,  1.2166e-01, -4.8978e-01, -2.4958e-01,
         1.6165e-01, -4.7596e-01,  3.2082e-02, -1.7444e-01,  1.6778e-01,
         2.1712e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.3886, -21.7833,  -2.0701,   0.3027,  -0.9754,  -0.2074,   0.0398,
         -0.0740,   0.2049,   0.1864,   0.0877,  -0.1482,  -0.2777,   0.0730,
         -0.1207,   0.2726,  -0.0668,   0.0987,   0.1702,  -0.0896,   0.0489,
          0.6898,   0.1106,   0.7341,   0.8542,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.5523, -27.6602,  -0.2832,   0.8970,  -0.8434,   0.2704,  -0.4565,
         -0.1728,   0.8247,   0.7723,   0.4401,  -0.3922,   0.5214,   0.2350,
          0.5240,  -0.1306,  -0.7432,   0.5100,   0.1447,  -1.2668,  -0.3933,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.6897, -48.4713,  -0.3925,  -0.5779,   0.6695,  -0.3983,  -0.4628,
          0.6892,   0.1166,  -1.5413,   0.4951,   0.9008,  -0.1379,   0.1430,
          0.3718,   0.3773,  -0.9020,   2.2607,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0351e+00,  1.1928e+01,  1.0692e+00,  1.3025e-02,  3.5903e-01,
         2.3145e-01,  1.2300e-01,  3.9719e-01, -1.5697e-01, -4.3795e-03,
         3.5488e-02, -1.8655e-01, -1.0289e-02,  6.0436e-03,  5.9545e-03,
        -2.1927e-02, -2.8436e-01, -8.4931e-02, -6.7231e-02, -2.7883e-02,
        -2.2139e-01, -1.1793e-01, -6.0828e-02, -1.3958e-02, -1.8579e-01,
         7.4328e-02, -1.3327e-01, -2.2727e-04, -6.2164e-02, -1.3554e-01,
        -5.1600e-02,  3.3875e-02, -3.3506e-02, -4.1997e-02, -1.2979e-01,
         4.5179e-02, -2.0143e-01, -8.9594e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7160e+00, -3.5641e+01,  3.2564e-02,  9.8507e-01, -1.1008e+00,
        -4.2594e-01,  1.5165e-01,  4.9614e-01, -3.1940e-02, -3.9994e-01,
         1.3378e-03,  1.2230e-01, -4.3997e-02, -2.0370e-01,  1.7427e-01,
         3.1292e-03,  7.3532e-01,  3.9601e+00, -5.0074e-01,  8.7885e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3330e+00, -6.3125e+01,  6.4268e-01, -1.2596e+00, -1.6160e+00,
        -7.3871e-01, -8.7730e-01, -1.9034e+00, -4.9955e-01,  2.7906e-02,
        -9.5711e-01,  3.8480e-01, -8.9566e-01, -4.0547e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7960e-03, -2.2698e+01,  1.1180e+00,  7.4951e-01, -7.3520e-01,
        -3.0656e-01,  5.2073e-01,  1.9678e-02, -2.0372e-01, -3.3694e-01,
        -2.1180e-01, -1.9429e-01, -4.0782e-02,  5.6047e-02, -3.6176e-02,
         3.3996e-01, -1.7809e-01,  6.2384e-02,  3.7447e-02, -6.9842e-02,
        -2.6028e-01, -4.1736e-01, -1.3386e-01, -4.8264e-01,  4.6098e-02,
         2.1302e-01, -5.3896e-02, -1.2303e-01, -3.4203e-02, -1.7654e-01,
        -1.3016e-01,  5.5268e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.7566, -37.1377,  -2.4144,  -3.2815,  -0.5589,   0.7924,  -1.2447,
         -0.1903,   0.3014,   0.5477,   0.1428,   0.5131,   0.0437,  -0.0602,
         -0.5514,   0.4105,  -0.0408,  -0.2527,   1.2667,   1.6873,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7293e-01,  1.4051e+01,  1.6330e-01, -5.4946e-01, -1.1783e-01,
        -7.7016e-03, -3.0804e-02, -1.5356e-02,  1.0649e-01, -2.6230e-01,
         1.6357e-01, -8.1619e-02, -2.2380e-01, -9.7334e-02, -2.7602e-02,
        -2.3686e-01, -3.1237e-01,  9.1204e-02, -1.9056e-01, -8.2454e-02,
        -3.8633e-02, -1.7619e-01, -1.1823e-01, -2.1618e-01,  8.1769e-02,
        -7.4554e-02, -1.6685e-01, -3.5378e-01,  8.9078e-02,  2.0718e-03,
         2.6162e-01,  5.9570e-02,  4.6110e-01,  9.9057e-02, -5.1896e-02,
         2.8520e-02,  2.4937e-02, -1.7328e-02,  6.8835e-03,  4.3929e-02,
        -2.4857e-01,  3.0664e-01,  5.3011e-02,  1.7640e-01, -3.0672e-01,
        -1.8638e-01, -3.2472e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9588e+00, -5.6673e+01, -4.0764e-01,  1.3263e+00, -3.2544e+00,
         1.4058e-01, -7.2247e-01,  8.8651e-01,  1.1779e+00, -2.9841e-02,
         1.6101e+00, -1.2570e+00, -6.1846e-01,  3.0093e-01, -1.5964e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0330e-01, -4.4209e+01,  1.1260e+00,  7.2732e-01,  6.4344e-01,
         1.2140e+00, -7.1451e-01, -1.6352e+00,  4.6133e-01, -2.3280e-01,
         6.6852e-01,  2.0921e-01,  1.0714e+00, -8.8702e-01,  8.0625e-01,
        -4.4697e-01, -4.5223e-01, -1.1541e-01, -3.7886e-01,  4.3506e-01,
        -3.2493e-01,  2.0337e-01, -4.8906e-02, -8.7848e-01, -2.5507e-01,
         3.6296e-01, -1.4833e-01,  2.5746e+00, -6.0637e-02, -4.9269e-01,
         2.7711e-01,  7.5063e-01,  5.6431e-01,  3.1264e-01,  1.9576e-01,
        -2.8783e-01,  8.5535e-02,  1.0197e-01,  1.0870e+00,  7.8056e-02,
        -2.4473e-01,  3.9079e-02,  1.2696e-01,  5.6081e-02,  9.2999e-02,
         4.2084e-01,  4.0260e-01,  2.8935e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-3.0977e-01, -3.5029e+01,  3.6052e-01, -1.6243e-02,  4.6438e-01,
         5.8991e-01, -1.4407e+00,  2.6153e-01,  3.6766e-01,  1.2580e-01,
         1.3849e-01, -2.5354e-02,  4.6505e-01,  1.3605e-01,  5.3308e-01,
         5.5839e-02,  1.6029e+00,  5.3158e-01,  1.3286e-01,  3.7405e-01,
         4.4466e-01, -3.4233e-01, -5.1221e-01, -7.2315e-01,  1.1747e-01,
         1.1286e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3466e-01, -4.5231e+00, -7.4780e-02, -1.1664e-01, -4.0567e-02,
        -1.5051e-01,  7.3777e-02, -4.4578e-02, -7.2991e-02, -3.4318e-02,
        -4.3796e-03,  2.1427e-02,  2.9419e-02, -1.4380e-02,  3.6035e-02,
         2.2764e-02, -6.4162e-02,  2.8382e-02,  5.7834e-03, -9.9606e-02,
         1.0287e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5160e-01,  1.1450e+01,  1.0186e+00,  1.0050e+00,  1.3445e-01,
        -2.0503e-01, -2.1894e-01,  1.3223e-02, -7.1988e-02, -1.2199e-01,
        -2.4630e-02, -3.4645e-02, -1.2628e-01, -4.8464e-01, -4.1454e-01,
         4.1966e-02,  2.1543e-02, -4.4977e-01,  2.2032e-01, -2.0814e-01,
         3.5011e-02,  6.5763e-02,  2.6701e-01, -1.0904e-01,  5.6821e-02,
        -6.2615e-03, -4.5019e-02,  3.6081e-02, -4.9646e-02, -5.3878e-02,
         4.2411e-02, -9.3507e-03, -2.4775e-02, -6.8671e-02, -3.7343e-01,
        -3.6965e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5477e+00,  2.5343e+01, -2.3616e+00,  5.0531e-01, -5.3251e-01,
        -1.1650e-01, -9.1304e-01, -6.0217e-03, -1.3553e+00, -1.0842e-01,
         4.0087e-01, -6.3341e-01, -6.1631e-01, -5.8452e-01, -1.9859e-01,
        -4.4358e-01,  9.6340e-03, -2.6273e-01,  6.6054e-01, -7.2326e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6215e+00, -3.8507e+01, -3.1499e+00,  2.6768e-01, -8.5239e-01,
         1.1273e-01, -2.5034e-01,  1.4666e+00, -2.5842e-02, -1.3650e-01,
        -7.4027e-01,  4.4675e-01,  6.7636e-01,  1.6999e-01, -5.3402e-01,
        -3.7365e-01,  9.2460e-01,  2.2441e-01,  8.0190e-01, -4.3554e-01,
         3.7599e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.6640e-01, -8.8503e+00, -3.6790e-01, -8.8524e-02,  1.7041e-02,
         5.2317e-02, -3.8919e-02, -2.7675e-02, -1.4281e-01, -1.4164e-05,
        -1.6782e-01, -4.1414e-02, -2.1773e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4480e+00, -4.7160e+01, -4.2123e-01, -2.4499e-01,  5.2460e-01,
         6.6361e-01,  3.7538e-01, -6.6845e-01,  2.1724e+00,  4.2274e-01,
         8.1602e-01,  1.7716e-01, -1.5201e-01,  7.3252e-02, -7.6134e-01,
        -1.0871e+00, -4.3054e-01, -4.6664e-01, -7.8406e-02,  6.8657e-01,
        -8.4627e-01, -4.7814e-01,  4.9565e-04,  3.3916e-01, -6.7552e-02,
        -3.9760e-01,  9.6580e-01, -3.3037e-01,  1.7949e-02,  1.8551e-01,
         2.5014e-01, -2.5093e-01, -4.3187e-01,  1.3185e-01, -6.7503e-01,
        -2.5628e-01, -8.2395e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7803e+00,  5.4660e+01,  2.0694e+00, -5.6075e-02, -1.2520e+00,
         9.9194e-02, -1.2350e-01, -2.1531e-02, -3.0215e-01, -6.8537e-01,
        -8.7330e-01, -2.0125e-01,  1.2032e+00,  2.2036e-01, -8.5816e-01,
         6.0122e-02,  1.4307e+00,  7.3441e-01, -1.3446e-01, -1.8554e-01,
        -7.1945e-01,  7.3516e-01, -4.4323e-01,  2.2922e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5015e+00,  5.0988e+01,  3.5772e+00,  4.6446e-01,  1.2327e+00,
         2.2635e-01, -3.6322e-01,  5.4397e-02, -4.7717e-01, -2.7830e+00,
        -4.7130e-02, -2.0258e+00, -3.5790e-01,  1.1129e-01, -1.8584e-01,
        -4.0118e-01, -1.2745e+00,  5.1690e-01,  4.2684e-01,  4.1341e-02,
         1.7997e-01,  2.4578e-01,  1.0760e-01, -3.6152e-03, -1.5027e-02,
        -1.3828e-02, -1.6668e-01,  1.7496e-01,  4.2603e-01,  3.7279e-01,
        -1.0074e-01, -1.3640e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7434e+00,  4.3049e+01,  2.0532e+00, -8.1950e-01,  7.7663e-01,
        -1.4413e+00, -4.3911e-01, -9.9237e-01,  5.0834e-01,  6.9563e-01,
         1.0091e-01,  1.9199e-01, -2.2066e-02, -7.8829e-02,  1.2747e-01,
        -1.9845e-01, -3.7106e-01, -1.1631e-01,  5.7212e-01,  1.5382e-01,
        -5.7755e-02,  7.5178e-01, -6.5789e-01, -8.8594e-02,  7.6333e-01,
         9.5886e-01,  1.1890e+00, -3.0927e-01,  5.3043e-02,  1.8448e-01,
         9.5541e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0235, 74.7996, -3.9788,  1.4108,  0.5823, -0.7327, -1.2124,  0.2014,
        -0.3838,  0.9098,  1.4255,  0.1438, -0.2723,  0.1434,  0.7283,  0.5064,
         0.8799, -0.2916, -0.6630,  0.3348,  0.1207,  1.0804,  1.1206,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2387e+00, -6.1695e+01, -2.5230e+00,  1.1604e+00, -1.1931e+00,
         4.9415e-01,  9.3920e-01, -2.9718e-01, -4.4707e-01,  7.4603e-02,
         2.0373e-01,  1.6963e+00,  7.9570e-01, -7.4050e-01,  3.1934e-01,
         5.3981e-02,  5.4412e-01,  4.9914e-01,  1.3817e-02,  3.2351e-02,
         5.2074e-01,  6.4368e-01, -1.0133e-01, -5.6899e-01,  3.3338e-01,
         9.2262e-02, -1.7930e-01,  8.8278e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-1.1934e+00,  1.5899e+01,  6.8760e-02,  7.1826e-02,  1.5155e-01,
         3.2508e-01,  5.0326e-02, -1.8518e-01, -9.2883e-02, -3.7386e-01,
         5.8245e-01, -1.6968e-01, -2.2681e-02, -1.9224e-01, -1.8538e-01,
        -1.8814e-01, -5.8404e-02,  2.1444e-01,  3.3267e-02,  9.2945e-03,
         2.5862e-01,  2.6574e-02, -1.4917e-01, -8.5396e-02,  2.0264e-02,
         5.2869e-02,  1.5993e-01, -6.1095e-02, -1.7538e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9065e-01, -1.6947e+01,  5.2866e-01,  8.3913e-03, -1.6338e+00,
        -2.3032e-01,  1.2916e-01,  8.8637e-02, -7.5026e-02, -1.7102e-01,
         3.2350e-01,  1.9129e-02,  2.2974e-01,  8.2605e-02, -1.4291e-01,
        -7.3055e-02,  9.0806e-03, -1.5393e-01, -4.3398e-02,  1.3940e-01,
        -2.3119e-02, -9.4200e-02,  1.8840e-01,  3.8072e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2092e+00,  1.9086e+01,  3.7822e-01, -1.0074e-01,  7.5932e-01,
         3.6725e-01, -1.8758e-01,  1.0489e-02,  1.7907e-01, -1.6284e-02,
        -4.3929e-01,  4.9971e-02,  3.1003e-02, -2.3100e-01,  3.2722e-03,
        -2.0218e-01, -4.0133e-01, -1.3304e-01, -1.7134e-01, -3.1416e-01,
         1.2835e-01,  1.0996e-01, -7.7516e-02,  1.6898e-01,  1.5797e-01,
        -8.4712e-02, -3.1669e-01,  4.7076e-02, -2.5733e-02, -9.3890e-02,
        -4.8511e-01,  3.9302e-01,  3.6166e-01, -9.0479e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.3496e-02, -1.8045e+01,  8.2299e-01, -1.8009e-01, -1.7085e-02,
        -5.4768e-03, -4.4232e-01, -2.7849e-01,  1.1416e-01, -1.4940e-01,
         2.3109e-01,  1.6032e-02,  2.2853e-02,  1.6279e-01,  5.1735e-02,
        -5.3298e-02,  3.9815e-02,  1.3477e-01,  2.2663e-02,  1.6969e-01,
         1.7789e-01, -4.0625e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4640e+00,  2.9877e+01,  8.6409e-01, -7.1018e-01,  2.0050e-01,
         3.1085e-02,  7.3158e-02,  8.8586e-02, -1.6716e-01,  4.3396e-01,
         5.5608e-01, -2.9311e-02, -4.8119e-01, -4.1401e-01, -7.8960e-01,
        -1.1982e+00, -4.2472e-01,  8.1940e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9246, 28.8937, -0.2854,  0.3744, -0.2133,  0.7714, -0.2244,  0.0528,
        -0.3212,  0.2969,  0.3310,  0.0685, -0.2354, -0.3206,  0.0658, -0.0526,
         0.0775, -0.0601,  0.0321,  0.1639,  0.1011, -0.1632, -1.0925,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8885e+00, -6.5509e+01, -1.2813e+00,  2.4703e-01, -1.0160e-01,
         1.8162e-01,  2.8780e-01,  6.6761e-01,  1.1071e+00,  9.1571e-01,
         1.4110e+00,  3.5136e-01,  7.2496e-01, -5.1765e-01, -4.0298e-02,
         1.1130e+00,  3.5361e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.3280e+00,  4.7130e+01,  1.7049e+00,  1.0442e-02, -1.7641e-01,
         7.4959e-01,  7.9129e-01, -1.7339e+00, -1.4489e-01,  4.8744e-01,
        -3.3689e-01, -6.9658e-01,  5.1186e-01,  9.9242e-01,  3.5919e-01,
         2.8749e+00, -3.6338e-01,  5.4674e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -2.9709, -29.3644,  -0.4439,   0.5097,   0.6927,   0.3007,   0.0344,
         -0.1412,   0.0550,  -1.4839,   0.6362,   0.5300,  -0.1895,   0.1425,
          0.2839,  -0.8947,   0.2465,  -0.2475,  -0.8886,  -0.3338,   0.0988,
         -0.2299,   0.0875,  -0.1636,   0.1401,  -0.8202,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3251e+00,  5.8093e+01,  5.2473e+00, -1.2230e+00, -9.4273e-01,
        -2.5197e+00,  6.4521e-01, -2.2431e-01,  1.3750e-03, -6.5059e-01,
        -2.6965e-01, -1.5315e-01, -4.2402e-01,  3.3541e-01, -3.1076e-01,
         7.0931e-02, -2.6359e-01,  1.0051e-01, -2.7347e-01, -4.8681e-01,
        -2.3754e-01,  6.4427e-01,  3.6002e-01,  7.2631e-04,  1.3580e-01,
         1.1205e-01, -3.2044e-01, -2.5364e-02,  1.2201e+00,  2.2299e-01,
        -2.1534e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.2210, -42.4156,  -2.2670,   0.8709,   0.1686,  -0.5026,   0.5210,
          0.1520,  -0.1707,   1.3276,   0.4377,   0.2095,  -0.8040,   0.0826,
          0.8473,   0.2651,  -0.1133,   0.7478,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6181, 15.5033, -0.6093, -1.1518, -0.6913,  1.4295, -0.4096, -0.1558,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 4.7371e-01,  8.9958e+00,  1.4906e-01, -2.9977e-01, -1.2336e-01,
         2.9613e-01,  5.2992e-02,  9.8843e-02, -4.2199e-02,  9.9470e-02,
        -1.2390e-02,  2.0632e-02,  4.8772e-02,  2.8336e-02,  2.2748e-02,
         4.0470e-02,  6.1842e-02,  3.0446e-02,  1.3519e-03,  3.5997e-03,
        -3.8435e-02, -4.4029e-02, -8.5752e-02, -4.9076e-02, -4.5229e-02,
        -2.1666e-02, -6.2108e-02,  4.9127e-02, -6.7339e-03,  1.9593e-02,
         8.0066e-02, -1.2311e-02,  4.5904e-02,  7.2157e-02,  1.3952e-02,
         1.0659e-01, -6.2068e-02,  5.6293e-02,  4.2795e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5564e-01, -2.8195e+01,  2.0494e+00,  1.9972e+00, -6.0339e-01,
         3.9513e-01,  2.5379e-01, -2.4280e-01,  1.7603e-01,  1.4894e-01,
         3.1329e-01, -2.7356e-01,  1.3324e-01,  6.0883e-01,  8.3819e-02,
         2.4883e-02, -2.7143e-01,  7.1125e-02, -6.5800e-03, -2.6473e-01,
        -1.6267e-01, -6.4298e-02, -1.3809e-01,  2.5727e-01, -8.6126e-02,
         1.9633e-01,  1.0973e-01,  8.1923e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.5627, -10.4167,  -0.1568,  -0.1995,   0.1679,   0.2850,  -0.3217,
         -0.3145,   0.0596,  -0.0278,   0.1256,   0.0672,  -0.2155,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -4.6283, -33.7503,   1.6887,   2.0500,   0.8989,   1.3428,   1.2885,
          0.1775,   0.2492,   0.0528,   0.9476,   0.1432,  -2.7828,   3.5246,
         -0.8119,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9143, 24.7109, -1.2347, -0.5050,  1.0718,  0.5933, -0.9409, -0.4765,
        -0.5967, -0.1530, -0.1853,  0.4358,  0.0992,  0.0784,  0.1199,  0.0733,
        -0.1036, -0.4334,  0.2763, -0.1561,  0.2731,  0.0502, -0.0818, -0.1046,
        -0.2663, -0.0400,  0.0741,  0.2629,  0.0934,  0.1044, -0.3430,  0.1371,
         0.2810,  0.2686, -0.0387, -0.3229, -0.0913,  0.0691,  0.0590, -0.0570,
        -0.3387, -0.1532,  0.2505, -0.0311,  0.0353,  0.1395, -0.9889],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3204e-01,  6.6554e+01,  1.9400e+00,  1.5177e-01,  2.3788e-01,
        -2.5441e+00,  2.6929e-01, -5.2284e-01, -2.0004e+00,  5.2138e-01,
         3.7876e-01,  1.5195e+00, -2.7695e-02, -1.3057e-01, -5.9575e-02,
        -2.7174e-01, -3.3850e-01, -3.9798e-01,  2.3621e-01,  3.5051e-02,
         3.5266e-01,  4.0960e-02,  3.0368e-01,  5.1831e-01,  1.1688e-01,
         2.9375e-01,  9.5847e-01, -3.0411e-02, -7.2338e-02,  3.5366e-01,
         1.8627e-01, -8.3160e-02, -4.0309e-01, -6.3589e-03, -1.6836e-01,
        -4.7937e-02, -1.2782e-01,  2.0128e-01,  1.4850e-01, -1.1873e-01,
        -5.8054e-01,  2.1175e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9821e-02,  1.6524e+01,  1.5473e+00, -6.1839e-01, -1.1228e-01,
         5.5121e-02,  1.9868e-01, -1.4744e-01,  1.4100e-01,  3.6507e-01,
         1.9532e-02,  5.3582e-02, -2.4666e-01,  1.1363e-01,  2.3693e-01,
        -3.3136e-01,  6.5703e-01, -6.5739e-02,  1.3929e-01, -1.3817e-03,
        -1.2132e-01, -7.5165e-02, -7.8026e-02, -5.3316e-01, -1.0898e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8950e-01,  6.5612e+00,  7.1560e-02, -5.4259e-02, -1.6673e-01,
         5.0142e-02,  2.5424e-01,  1.1396e-01,  1.1152e-01,  1.2235e-02,
         1.2008e-02, -6.8867e-02,  2.8346e-02,  3.1364e-04, -7.1621e-02,
        -1.5114e-01,  1.1995e-01,  1.5261e-01, -1.5136e-01,  1.4959e-01,
         1.0386e-01,  8.2712e-02, -5.5843e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0287e-01,  1.3875e+01,  9.9270e-01, -3.7753e-01,  2.6966e-01,
         1.8326e-01,  7.4120e-02,  2.2537e-01, -2.9671e-01,  5.5603e-01,
         1.6412e-02, -6.5055e-02,  2.2949e-03, -1.2279e-01, -4.9369e-02,
        -1.6574e-01, -3.3052e-01, -1.8725e-01,  1.2194e-01, -1.4812e-01,
         1.4237e-01, -1.8940e-01, -6.8876e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5281e+00, -2.4655e+01, -4.6174e-01,  6.8474e-01, -1.9557e-01,
        -5.5481e-01, -4.2526e-01, -1.8232e+00, -5.3504e-01, -2.1350e-02,
         2.3113e-02, -1.4390e+00,  2.3461e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.9549e-01, -3.3038e+01,  1.0497e+00,  1.7845e+00,  8.6382e-01,
         1.1446e-01,  1.6180e-01,  7.3239e-01,  9.8051e-01,  3.0477e-01,
         4.9506e-01,  4.0381e-02,  1.5589e-01, -6.4554e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5364e-02,  1.0000e+01,  3.7051e-01, -2.4834e-01,  2.5924e-02,
         2.5397e-02, -6.4221e-02,  2.7938e-02, -9.7710e-03, -3.6807e-02,
         3.2507e-02, -2.5609e-02,  2.0842e-01,  6.5558e-02,  1.4349e-01,
         4.8042e-02,  1.2172e-01,  3.0872e-01,  1.8391e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 1.6269e+00, -3.9785e+01,  6.4982e+00,  7.0436e-01, -1.2123e+00,
        -1.1709e+00, -1.1928e-01,  2.4555e-02, -3.2568e-01, -5.1444e-01,
        -1.1415e-01,  3.0926e-01, -5.7650e-02,  3.9564e-01, -2.6820e-01,
         3.0316e-01, -1.2510e-01, -3.1327e-01, -2.9471e-01, -7.4498e-01,
         6.4056e-02, -1.4446e-01,  3.0583e-01, -6.3093e-02,  4.2511e-02,
         4.6387e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -5.4050, -50.6508,  -0.5683,   0.8699,  -2.6383,  -0.5224,   0.5734,
         -0.5214,   0.3718,   0.1075,   0.2496,  -0.2638,  -1.7860,  -0.5610,
          0.9663,   1.1865,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.3099, -63.3834,  -3.8046,  -4.9816,  -0.6237,  -3.1983,  -0.7118,
         -1.5788,  -0.8156,   0.5476,  -0.4290,   1.6684,   0.1821,  -0.3512,
         -0.6971,  -0.5716,  -0.5567,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0962, 27.1650, -0.3338, -0.7886,  1.7174,  0.1606,  0.4769,  0.5925,
         1.2256,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1873e+00, -2.0947e+01, -5.8023e-01,  5.2076e-01,  7.4325e-02,
         2.6875e-01, -6.3222e-01, -8.5519e-01,  1.7265e-02, -5.0026e-01,
        -1.8735e-01, -2.5552e-02, -8.0084e-02,  1.4896e-01, -2.9722e-01,
        -4.5472e-01,  1.9123e-02, -3.6085e-01, -3.3473e-01, -1.7889e-01,
        -1.1394e-01, -1.5755e-01,  3.0437e-01,  8.6236e-02, -8.1700e-02,
         3.0343e-01, -3.9764e-01, -9.8264e-02, -2.7869e-03,  1.0103e-01,
         2.4907e-01, -1.9637e-01, -1.6457e-01,  1.3185e-01, -1.1286e-01,
        -2.5805e-02,  7.5957e-02,  1.8172e-01,  1.9340e-01,  1.2924e-01,
        -8.3183e-02, -2.2030e-01,  5.4574e-02, -1.4643e-02,  3.0189e-01,
        -1.4699e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.3080e-01, -5.5706e+01, -3.4873e+00,  9.7651e-01, -6.5031e-01,
        -1.7863e-01,  1.4688e-01,  3.9249e-01,  1.1618e+00, -9.2874e-01,
         7.2562e-01,  3.5976e-01, -4.6752e-01,  2.4678e-01,  1.0062e-01,
         2.8518e-02, -3.5776e-01,  1.3604e-01,  3.6768e-01, -4.5471e-01,
         4.0238e-01, -2.5239e-01, -5.2421e-01, -3.5956e-01, -1.6466e-01,
         8.1277e-02,  8.0943e-01, -4.5879e-01, -1.0486e+00, -2.1134e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2780e+00, -2.5236e+01, -5.2879e-01, -2.0544e-01,  1.2388e-02,
        -2.3404e+00, -9.0211e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3555, -6.0218,  0.0074, -0.0113, -0.1683, -0.0277, -0.0715, -0.1551,
        -0.1043, -0.3866, -0.3097,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.5198, -32.9931,   1.7061,  -1.3707,   0.0824,  -1.4038,   1.0726,
         -0.2404,   0.0470,   0.3835,   0.1590,   0.5711,   0.0590,  -1.2949,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6500e-01, -1.9005e+01,  1.0549e-01, -6.5334e-02, -4.7997e-01,
        -3.2568e-02,  1.8034e-01,  2.1019e-01,  2.2355e-01, -3.4326e-02,
         4.0908e-02, -5.6135e-02,  1.6016e-01, -1.4688e-01,  5.6537e-02,
         1.0464e-03, -9.9926e-02, -1.6334e-01,  1.8551e-01, -3.9375e-01,
        -6.7430e-02, -8.9466e-02, -8.4336e-03, -6.2865e-02,  7.9412e-02,
        -7.2241e-02,  7.4641e-02,  2.2295e-02,  2.0662e-01,  1.0954e-01,
         3.1682e-01, -2.6018e-01,  3.2642e-01, -1.3408e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5188e-01,  3.4072e+01,  3.2558e+00, -2.2998e+00, -5.5517e-01,
        -1.0945e-01,  9.0292e-01,  3.5199e-01, -5.0970e-01,  5.9055e-01,
        -1.9757e-02,  2.7367e-01, -4.9957e-02,  6.1331e-01, -4.1652e-02,
        -5.3179e-01,  7.3195e-01, -9.4168e-01,  4.0147e-02,  6.4914e-01,
        -5.1800e-01,  1.1049e-02, -1.0927e+00,  1.0987e-01, -1.3905e-02,
         6.0164e-02,  1.0367e+00, -2.0481e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5002e-01, -5.6078e+01, -2.9352e+00,  6.3476e-01, -7.1584e-01,
         4.9554e-01,  2.6985e-01, -6.5597e-01,  3.7287e-01, -1.8937e-02,
        -4.7326e-02,  3.5401e-01,  1.2661e+00, -1.7689e+00, -1.3905e+00,
         5.4583e-03,  6.3834e-01, -1.8617e-02, -1.1427e+00,  2.6744e-01,
         9.2906e-02, -1.6255e-01,  8.9213e-01,  2.0209e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 1.0651e+00,  1.3723e+01,  1.1859e-01, -2.6765e-01, -4.1471e-02,
         1.5005e-01, -3.9563e-01, -3.3806e-02, -6.0848e-02, -1.2057e-02,
        -1.9541e-01, -1.5723e-01,  6.0527e-02, -1.7868e-02, -1.0044e-01,
        -1.5613e-01, -8.4273e-02, -7.4762e-02, -1.5349e-01, -1.5872e-01,
        -3.9968e-02, -7.3318e-02, -1.3097e-01, -6.6987e-02,  5.3106e-02,
         7.9361e-02, -7.2207e-02, -2.7053e-02, -2.6573e-01, -2.8741e-01,
        -4.2433e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -6.3372, -71.0199,   1.0961,   1.9752,  -0.4564,  -0.3041,   0.8715,
          0.3191,  -1.3995,   0.6122,  -0.6609,   1.2985,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.3843, -38.5782,  -0.0541,   3.0891,  -0.1256,   0.0870,   0.6846,
          0.4758,   0.4657,   0.4340,  -0.1988,   2.0136,  -1.4689,   0.3257,
          0.2631,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7876e+00, -2.2478e+01,  1.5312e+00,  2.5046e-01, -3.7011e-01,
        -2.0808e-01,  1.5211e-01,  2.6284e-01, -3.5126e-02, -5.8702e-02,
        -2.9856e-01,  2.9031e-01, -1.9422e-01,  3.2592e-01,  2.3047e-02,
         1.2071e-02, -1.5388e-02, -2.1585e-02, -2.3947e-01,  2.8956e-01,
         2.6779e-02, -1.7197e-01, -6.0912e-02, -1.1675e-02, -8.0065e-02,
         1.2761e-01,  1.0014e-01,  1.3414e-01,  4.4048e-02,  3.5523e-01,
        -4.7694e-02, -1.6889e-01, -6.6929e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1594e-01,  7.2920e+00, -1.8451e-01, -6.0828e-01,  5.8086e-02,
         2.7880e-03, -5.6625e-02,  1.3569e-01, -1.2384e-01, -3.3223e-02,
        -1.3860e-01, -3.5745e-02, -8.7578e-02, -3.7614e-01,  2.3561e-01,
        -2.7386e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.7038, -45.6638,   3.3351,   0.8789,   0.2305,  -0.2065,  -0.4409,
         -0.8018,  -0.6321,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2082, 12.3860, -0.5472, -0.6958, -1.4247,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7938e+00, -7.5450e+01,  1.6677e+00, -1.6658e+00,  1.9689e+00,
        -1.4732e-01,  1.3241e+00,  1.1961e+00,  4.6893e-01,  3.8095e-01,
         1.0641e+00, -1.5069e+00, -4.0124e-01,  6.1762e-01, -4.5662e-01,
        -4.2430e-01, -1.6138e-01,  3.3731e-01, -7.1787e-01,  4.3720e-01,
         3.6447e-01, -2.1574e-01,  4.6969e-01, -1.4680e-01, -5.3515e-02,
        -1.3715e+00,  9.5665e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.8379, -38.9710,  -0.9736,  -0.2637,  -2.2552,  -1.1355,  -0.9874,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7889e+00,  8.5505e+01,  3.3338e+00,  1.1325e+00, -2.2379e+00,
         1.1954e+00,  5.8701e-01, -3.3038e-02, -2.3076e+00,  4.4480e-01,
        -4.5206e-01,  1.5753e-01, -7.9783e-01,  3.3930e-01, -1.6973e-02,
        -2.4806e-01,  1.3373e+00,  7.0378e-01, -2.0221e-02,  1.0293e+00,
         2.5447e-01,  7.7644e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8734, 32.2404,  1.4232,  0.2923, -0.9501,  0.4340,  0.1742, -0.3372,
        -0.5542, -1.3610,  0.1911,  0.1502, -0.2413, -0.0549, -0.5774, -0.1364,
        -0.7157, -0.1452, -0.6413,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.6553e-01,  4.4283e+01,  5.3677e-01, -5.7633e-01,  3.4309e-01,
        -4.8349e-02, -4.5963e-01,  4.8146e-01,  6.3867e-01,  2.0793e-02,
        -2.2077e-01,  3.0584e-01, -3.6243e-02,  1.6201e-01,  4.7724e-01,
        -1.0319e-01,  5.3611e-01, -4.4338e-02, -2.8798e-03, -5.5979e-01,
         8.7059e-02, -1.7475e-01,  1.8423e-01,  4.1905e-01,  4.2895e-01,
         1.1489e-01, -7.1579e-04,  1.3736e-01, -1.1950e+00, -5.3028e-01,
         5.3586e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 1.3948e+00, -1.1365e+01, -2.5835e-01,  4.0264e-01, -1.9167e-01,
         3.2881e-02,  6.6800e-02,  1.8798e-01,  1.8286e-01,  5.6238e-02,
        -4.1604e-02, -1.9637e-01, -3.7458e-01, -1.7707e-01,  2.0504e-01,
        -3.3296e-02,  2.1939e-02,  4.5595e-02,  3.9582e-01, -1.5416e-01,
        -2.9650e-02, -7.2666e-02, -6.7259e-02, -1.3678e-01, -9.3797e-02,
         1.6390e-03, -9.7234e-03,  5.4612e-02,  2.2160e-02,  6.2905e-02,
        -2.7333e-01, -4.3757e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2132e-01,  5.8050e+01, -5.3537e-01,  6.8617e-01, -1.9317e+00,
         2.9495e-01,  4.0903e-01,  1.7850e+00,  9.7315e-01,  1.0692e-01,
        -2.3247e-01, -4.5293e-01, -1.2630e+00, -9.7898e-01, -8.8372e-02,
        -5.2293e-01, -1.3461e-01, -5.8908e-01, -5.1601e-02,  5.8798e-01,
        -2.8410e-03,  2.4428e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9663e+00,  4.1721e+01,  8.4812e-01, -6.1168e-01, -1.2192e-02,
        -4.5989e-01, -4.3716e-02,  3.1958e-01, -1.9861e-01, -2.8115e-01,
         6.7105e-01,  2.1252e-01, -5.3877e-02, -3.4167e-01, -3.4106e-01,
        -3.1960e-01, -3.1124e-01,  9.5191e-01, -2.7617e-01, -6.1926e-01,
        -6.2194e-02,  2.3964e-01,  4.1192e-01, -2.1451e-01, -8.6934e-01,
        -2.0632e-01, -3.3368e-01, -4.3940e-01, -6.4423e-01, -9.3140e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2186e+00,  4.7332e+01, -6.9374e-01,  2.3735e-01, -2.6846e-01,
        -7.7180e-01,  3.4257e+00, -1.6731e-01, -2.2999e-01, -2.9550e-01,
        -1.8584e-01,  1.4639e-01, -8.0583e-01, -1.0079e-02, -8.3094e-02,
        -8.7622e-02,  9.5604e-01, -9.8705e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2696e+00, -1.8129e+01, -6.5427e-01, -3.6400e-01,  6.0102e-03,
         7.5470e-01,  7.0101e-02, -1.3753e-01,  4.0200e-02, -1.5779e-03,
         1.2101e-01, -4.3015e-02,  6.0670e-02, -4.2332e-02,  2.7442e-02,
         3.5353e-01, -3.3601e-01, -1.1578e-01,  1.2305e-01, -3.8682e-02,
         9.0124e-02,  1.3850e-01,  6.7386e-02, -1.0429e-01, -1.1945e-01,
         2.3202e-02,  5.7614e-02, -3.7398e-02, -8.6996e-02, -9.9743e-02,
        -1.8052e-03,  6.7620e-02, -8.1716e-02,  9.7753e-02, -2.8225e-02,
         1.2771e-03, -1.0722e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3856e+00,  5.6941e+01, -5.8669e+00, -1.6217e+00, -4.9677e-03,
         1.0169e+00, -6.0180e-01, -1.2677e+00, -3.0039e-02,  7.7801e-01,
        -1.9491e-01, -3.5639e-01,  6.7259e-01,  4.1806e-01, -2.3674e-01,
         1.6172e-01,  6.9853e-01,  4.6592e-01, -2.0579e-01,  3.6886e-02,
         2.6397e-01,  1.3638e-02,  2.9123e-04,  2.3902e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0546e-01,  4.6416e+00,  2.6620e-01,  1.8426e-01,  9.2945e-02,
         1.2893e-01, -6.8631e-02,  1.6338e-02, -1.8993e-02, -2.2183e-02,
         9.8330e-03, -2.3027e-03, -7.7614e-02,  1.3687e-04,  5.7338e-02,
         1.8456e-03, -2.1342e-02,  2.7899e-02,  1.9348e-02,  1.1731e-02,
        -2.3694e-02, -3.8212e-02,  2.0291e-02, -6.2791e-02,  4.1845e-02,
         3.0476e-02,  8.6745e-03, -1.6032e-02, -3.3667e-02,  4.8388e-02,
        -1.1918e-02, -2.9457e-02,  2.2381e-02, -3.9873e-03,  3.2797e-02,
        -1.1712e-02, -5.2312e-03, -5.2101e-03,  1.5260e-02,  1.5575e-02,
        -1.3676e-02, -7.1607e-02, -1.6067e-02,  2.3712e-02,  1.0624e-01,
         3.3084e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.6886e-02,  1.6488e+01, -3.9177e-03, -1.0547e-01, -8.7399e-02,
         1.3848e-01, -7.5266e-01, -2.1003e-01, -1.4573e-01, -2.7625e-01,
         1.9884e-01, -1.7159e-01, -4.6815e-02,  3.1250e-02,  2.1863e-02,
         1.3837e-01,  1.3327e-01, -4.2198e-01, -3.3072e-01, -7.0479e-04,
         4.1569e-01,  2.6512e-01, -4.1575e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8294e+00,  7.0202e+01, -9.5563e-01,  1.5673e+00, -1.2516e-01,
         5.8670e-02,  2.3788e+00, -1.6031e+00, -7.2321e-01, -1.6910e+00,
        -2.0272e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7371, 26.7903,  0.1592, -0.9268, -0.1809, -0.1192, -0.5466, -0.3669,
         0.0689,  0.1959,  0.3476, -0.1536,  0.6291, -1.4811,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1692, 16.1129, -0.6280, -0.5693, -0.0829,  0.1256, -0.0640,  0.4283,
         0.0879, -0.2135,  0.6196,  0.1499,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7243e+00,  7.1545e+01,  5.9622e-02,  2.1864e-01,  1.3261e+00,
        -1.7846e+00, -4.4707e-01,  1.4855e+00, -1.0634e-01,  4.2675e-01,
         1.7669e-02, -8.0693e-01, -1.0068e-01,  2.1640e-01, -3.4340e-01,
         1.7597e-01,  7.6442e-02, -1.0739e-01,  5.6777e-01,  5.7687e-01,
         1.6701e-01,  1.6183e-01,  2.1525e-01,  2.3878e-01, -4.6185e-02,
        -1.0639e+00,  2.0593e+00,  8.2286e-02, -1.7343e-01,  2.1075e-01,
         2.4268e-01,  5.2381e-01,  8.4264e-01,  6.3962e-01,  3.0302e-01,
         2.6134e-01,  4.2186e-01, -2.5457e-02, -1.0204e-01,  4.5734e-02,
         3.2505e-01, -3.5515e-02,  1.3274e-01, -8.7864e-02, -2.6093e-01,
        -1.1093e-01,  2.9412e-01, -8.7750e-02,  7.9768e-03,  9.2293e-02,
         7.1175e-02, -1.5011e-01, -4.4701e-01, -2.3563e-01,  1.1895e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 6.4028e-01, -1.0519e+01,  1.0315e-02, -1.1839e-01, -2.0059e-01,
        -2.8829e-01,  2.2032e-02,  1.7770e-01, -9.3941e-02, -7.3283e-02,
         2.5672e-01, -2.2451e-01, -1.0289e-01, -1.6045e-03, -2.4722e-02,
        -1.6925e-01,  8.1283e-02, -2.6163e-01, -2.7062e-02, -1.6952e-01,
         1.7090e-01, -4.0524e-01,  3.7609e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0135e-02,  4.0441e+01, -1.5643e+00,  1.1641e+00,  7.5239e-01,
        -5.2778e-01, -2.4975e-01,  3.0729e-01, -1.1726e+00,  6.2826e-01,
        -1.2059e+00,  6.9340e-03, -1.3694e+00, -1.4555e+00, -1.0230e+00,
         5.2589e-02, -4.7067e-01, -2.3699e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9694e-03, -9.8718e+00,  9.3268e-01, -1.2875e-01, -2.9803e-01,
        -2.9521e-01, -1.1627e+00, -2.6385e-01, -3.6817e-01, -5.5373e-02,
         1.2955e-01, -3.4832e-02,  4.1239e-01,  1.2019e-01,  2.4681e-01,
        -2.5760e-01, -3.1291e-03, -6.6713e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4278e+00, -2.1693e+01,  8.4727e-01,  4.5615e-01, -2.1616e-03,
         8.5824e-02, -7.6394e-01, -3.2736e-01,  5.1942e-01,  6.6921e-02,
         1.8987e-01,  1.1860e-02, -7.3190e-02,  3.2184e-01,  1.3491e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8016e+00, -5.3597e+01, -2.5819e+00, -1.7310e+00, -9.4925e-01,
        -1.9092e-01,  6.3676e-01,  1.4804e+00, -5.8161e-01, -6.2677e-01,
        -7.0525e-01,  1.3853e-01, -6.8536e-02, -1.6505e-01,  7.9759e-01,
        -1.5136e-01, -5.1224e-01, -3.6984e-01, -2.2625e-01, -5.1431e-01,
        -7.5069e-01,  9.8604e-01, -2.9376e-01,  3.3030e-02, -4.3487e-01,
         1.4442e-02, -1.7109e+00, -1.1470e-01, -1.3177e-01, -6.2637e-01,
        -6.9820e-01,  3.9862e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.8552e-02, -9.5588e+00, -3.1532e-01, -2.4177e-01, -2.9609e-01,
        -5.1725e-02, -7.6691e-02, -3.9698e-02,  7.4597e-02,  8.0663e-02,
        -6.3110e-02,  1.1856e-01,  1.3280e-02,  1.4275e-01, -3.0102e-03,
        -2.2634e-02, -1.5915e-02,  1.1592e-01,  3.9716e-02, -5.5831e-02,
         2.1737e-01,  3.2244e-02,  5.2534e-02,  1.0396e-01,  4.1028e-02,
         4.0300e-02, -1.0571e-03, -3.1144e-02,  6.0499e-02,  8.8952e-02,
        -6.5034e-02,  2.1751e-02, -3.5205e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5431e+00, -4.9728e+01, -6.8293e-01, -1.8650e+00,  1.3040e-02,
         1.2688e-01, -2.8087e-01,  3.8698e-01, -3.3011e-01, -1.6002e-01,
         1.4311e-01, -3.2811e-01,  3.9032e-01,  1.4063e-01, -3.3501e-01,
        -4.5605e-01,  5.7377e-02, -4.0306e-02,  3.7578e-01,  3.6281e-01,
         1.5244e-01,  3.9671e-03,  3.1009e-02, -5.2478e-01,  5.6367e-02,
        -7.9120e-02,  5.2643e-02, -2.0464e-01, -3.2294e-01,  4.5750e-01,
        -3.3337e-01, -3.3633e-01, -7.9594e-02, -1.1453e-01, -8.3886e-04,
         3.0406e-01, -2.0002e-01,  3.3943e-02, -1.3785e-01, -4.6074e-01,
        -5.5616e-02, -5.3887e-02, -1.8207e-01,  7.8367e-02, -1.6964e-01,
        -5.1481e-01, -9.2884e-02, -1.8734e-01, -3.6276e-01, -4.5879e-02,
         6.4111e-02, -1.3181e-01, -2.0207e-01, -2.0000e-01, -4.7607e-01,
        -2.5139e-01, -4.1043e-01, -6.9375e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0991, 31.1654,  0.9363,  0.4560,  0.4871, -0.2228,  0.2295, -0.1651,
        -0.5673,  0.3693,  0.2421,  0.3986, -0.4926, -0.1625, -0.1354, -0.1265,
        -0.2092, -0.1046, -0.1586, -0.2099, -0.1392, -0.1194, -0.2021, -0.0878,
         0.0346, -1.3598,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  2.2905, -50.9009,   2.9350,   1.1611,   0.4856,   0.3600,  -0.3450,
          1.0823,  -0.6666,  -0.9899,  -1.4048,   0.0611,  -0.4399,   0.2732,
         -0.3881,   0.0822,  -0.1867,   0.5624,  -0.6846,  -0.3174,   0.6384,
          0.9374,   1.2118,   1.3783,   0.1835,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4416e-02, -6.3135e+01, -5.6593e+00, -1.9352e-01, -1.7479e+00,
        -4.2455e-01,  3.4590e-02,  5.2649e-01,  1.3900e-01,  2.4586e-02,
        -8.4552e-01,  7.2357e-01, -5.9048e-01,  1.1385e-01,  8.0038e-01,
         5.6679e-02,  6.8032e-01, -1.3590e-01, -7.6997e-02,  1.6345e-01,
         1.1515e-03, -1.2394e+00,  3.3995e-01,  9.9103e-01,  3.3772e-01,
         6.3150e-01,  3.0917e-01,  7.8195e-02,  5.8274e-01,  4.9482e-01,
         8.0135e-02,  3.8655e-01,  5.1441e-01, -3.3043e-02, -8.2910e-01,
         3.5657e-01,  3.7965e-01,  2.6532e-01,  6.6471e-02, -1.5579e-02,
         5.2960e-01, -1.2200e-01,  8.2390e-01,  1.4596e+00,  4.8785e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3748e+00, -5.8650e+01, -1.7100e+00, -1.6669e+00, -4.3048e-01,
        -1.5660e+00, -3.5833e-01, -9.6882e-01,  1.1536e+00,  5.8638e-01,
         2.5842e-01, -1.9046e-01,  7.1538e-01,  1.8717e-01, -6.1801e-02,
         2.0921e-02, -5.5434e-01,  3.0091e-01,  2.4298e-01, -6.5745e-02,
        -1.5683e-01, -1.1836e-01,  3.8033e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  6.2488, -77.3557,   1.2770,   6.1889,   1.1883,   0.9676,   1.5358,
          0.4642,   2.2342,   0.3264,   3.6370,  -6.4516,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 1.6848e+00,  6.0043e+01,  2.8594e+00, -6.9820e-01, -1.2956e+00,
         1.9187e+00, -1.3709e+00, -4.9941e-01,  2.9879e-01, -1.4047e+00,
        -1.4611e+00,  4.8529e-02, -2.9236e-01,  4.7176e-01,  3.9401e-01,
        -1.3110e+00,  6.8782e-02,  7.7285e-01,  5.2845e-01, -2.3472e-01,
        -8.4711e-01,  4.9476e-01,  1.5173e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0513e+00,  6.3859e+01, -5.8913e-01, -4.3196e-01,  1.3595e+00,
         7.7969e-01,  4.8744e-01, -1.6542e-01, -1.4941e-01,  3.7792e-01,
        -1.4237e-01, -4.5487e-01, -2.4252e-02,  1.7367e-01,  9.7448e-01,
         8.5992e-02, -5.5502e-01, -1.6186e+00, -1.3422e-01,  9.5028e-02,
        -9.0290e-01, -6.0681e-01, -6.4244e-01, -5.4644e-01, -3.3957e-01,
        -1.3283e-03, -5.0296e-01, -1.5386e-01,  1.3934e-01, -1.4214e-01,
        -1.2797e-02,  2.5546e-01, -8.4647e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2106e+00, -6.7822e+01, -3.5252e+00,  2.1660e+00, -2.4361e-02,
         9.2113e-01,  6.1130e-01,  2.4170e-01,  1.0991e+00,  4.6773e-01,
         1.2316e+00, -2.2430e-01, -3.9855e-01, -2.4688e+00, -7.5298e-01,
         6.3396e-01,  7.4734e-01,  5.0792e-01,  2.9671e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8895e+00, -5.4973e+01,  1.3049e+00, -1.7074e+00, -8.6621e-01,
         5.2792e-04, -2.9244e-01,  3.7099e-01,  2.8078e-02,  9.7682e-01,
        -1.3998e+00, -1.4048e+00, -2.7680e+00, -2.2412e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4802e+00,  1.6490e+01,  6.6661e-01, -5.2028e-02, -4.2721e-01,
         5.0334e-01, -1.8554e-01, -8.1432e-02,  1.8415e-01,  1.0660e-01,
         2.6673e-01,  4.4788e-02,  3.3713e-02, -2.0353e-01, -1.7895e-02,
        -1.0265e-01, -2.3789e-01, -3.0022e-03, -1.7541e-01, -1.7120e-01,
         4.2981e-01,  1.0110e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5455e-01,  3.9937e+01, -5.8923e-01,  6.4357e-01,  6.2993e-01,
         1.4907e+00, -7.4795e-01, -1.6275e+00, -4.5404e-03, -5.4926e-01,
         1.8944e-01,  2.0849e-01, -3.9818e-02,  1.0896e-01, -3.3026e-01,
         1.9242e-01,  1.4878e+00,  7.2516e-01,  4.7077e-01,  1.7910e-01,
        -7.1234e-02,  3.8229e-01,  7.2548e-02, -2.7237e-01, -2.8991e-01,
         2.4413e-01, -8.4946e-02,  3.8506e-01,  9.3754e-01,  2.0232e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3150e-02,  8.0834e+00, -3.2398e-01, -2.9049e-01,  3.2946e-01,
         1.0415e-01, -2.6066e-02,  4.7848e-03, -2.3321e-02,  1.4305e-01,
        -1.3456e-01, -1.4376e-02,  6.4501e-02,  9.9396e-02,  8.5061e-03,
         4.5010e-01, -6.5799e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2447,  9.8387,  0.1876, -0.0814, -0.7340,  0.2105, -0.2643, -0.0177,
         0.4556,  0.6647, -0.6069,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3723, -7.9429, -0.1557,  0.1995,  0.1851,  0.2263,  0.0815,  0.1121,
         0.0302, -0.1954,  0.0126,  0.0937,  0.0514, -0.4121, -0.0633,  0.3200,
         0.5319,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.7866, -52.8668,   1.2141,  -3.1115,   0.3261,  -0.3889,  -0.6895,
          0.2367,  -0.0935,   0.5382,  -0.1647,  -0.8149,  -0.0593,  -0.6155,
         -1.2157,  -0.5950,  -0.1513,  -0.1435,   0.0598,  -0.2351,  -0.3034,
         -0.0877,  -1.6722,  -0.5753,  -0.3525,   0.0867,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1522e+00,  1.7207e+01, -1.0217e+00, -3.3014e-01, -2.9650e-01,
         1.3445e-01, -2.9978e-01, -7.4229e-02, -2.4933e-01, -3.2209e-01,
         9.7789e-02, -2.7170e-02,  3.1575e-02,  8.8418e-02,  1.5176e-01,
        -3.1930e-02, -7.1833e-02, -9.1783e-03, -6.6730e-02,  5.5445e-02,
        -7.3306e-02, -1.1978e-01, -9.9656e-02, -5.0764e-02,  1.2463e-02,
        -4.1696e-02,  5.2239e-02,  6.5090e-02,  1.5902e-02,  6.4125e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6362e+00, -5.7395e+01,  3.2052e-01,  1.1759e+00, -1.3860e+00,
         9.1564e-01, -8.1755e-01,  6.2606e-01, -1.4611e-01,  3.5066e-01,
         8.2541e-01, -1.3396e-02, -3.7402e-01,  1.6255e-01,  8.1845e-01,
        -1.6209e-01, -4.3363e-02,  3.6523e-01,  1.2983e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-6.4048e-01,  1.1882e+01, -2.7610e-01,  3.0188e-01,  4.5546e-02,
        -2.5572e-01,  5.2317e-01, -1.5069e-01,  5.4312e-03,  4.5778e-03,
         4.8821e-02,  1.2063e-01,  2.9129e-02, -1.7647e-02, -2.9556e-01,
         2.3971e-01,  4.6305e-02,  3.2284e-01,  1.7984e-01,  7.6714e-02,
        -2.1695e-02,  5.0401e-02,  9.6530e-02, -2.5459e-02, -3.6544e-01,
        -1.2635e-01,  3.7035e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4730e-02, -3.6835e+01,  1.3061e+00,  7.7256e-01,  1.3576e+00,
        -1.1118e+00,  5.1548e-01,  7.6569e-01,  7.8408e-01, -2.9618e-01,
        -8.9716e-01, -1.0162e+00,  4.2767e-01,  6.3477e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9064e-01,  1.8288e+01,  1.0585e+00,  2.4992e-01,  5.8894e-02,
         2.4368e-01,  2.1890e-01, -1.7986e-01,  1.3316e-01, -3.8202e-02,
         2.7819e-01, -1.2883e-02, -1.8872e-02, -1.4196e-01,  2.6416e-01,
         1.0499e-01, -5.8177e-02, -5.4874e-02, -9.6433e-02,  4.7791e-02,
        -6.0982e-03, -1.3173e-01,  5.0316e-02, -1.1412e-01,  2.4919e-01,
        -1.0367e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1696, 41.2034,  0.4381, -0.6297,  0.9618, -1.6520, -0.1556, -0.4649,
        -0.6762, -1.5392, -1.1309,  2.7734,  0.6690, -2.6083,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9184e-01,  1.3212e+01,  7.3389e-01, -3.4754e-01, -1.0611e-01,
         1.1123e-02,  4.1859e-02, -3.3995e-01, -3.5933e-03,  4.1337e-02,
        -1.0437e-01,  4.2056e-02, -5.3644e-02,  3.0984e-01,  2.1886e-01,
         6.2974e-02,  1.9647e-01,  1.7854e-01, -4.2584e-02,  1.1353e-01,
         7.7459e-02,  6.3375e-02,  1.1534e-01,  1.2314e-01,  2.5288e-02,
         3.0663e-03, -2.2032e-01,  2.0140e-01,  5.0051e-01, -2.4369e-02,
         1.6502e-01,  2.8961e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.6124e-01,  3.0315e+01,  4.7206e-01, -2.5466e-01,  6.7999e-01,
         1.1389e+00, -7.3117e-01, -3.6653e-01,  4.8381e-02, -3.5078e-01,
         5.5398e-01,  1.3978e-01, -3.5680e-01,  5.1322e-04,  2.7426e-01,
         1.1488e-01,  7.3931e-02, -2.8682e-01, -4.3266e-02,  1.9153e-01,
        -9.5000e-02, -1.7604e-01,  1.8251e-02,  8.8038e-01, -1.1110e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5701, 23.0703,  1.2539,  0.4654,  0.3985, -3.0540,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1896e-01,  4.1202e+00,  8.4356e-02, -4.7505e-02,  9.5176e-02,
        -6.3110e-02, -5.6206e-02, -3.9859e-04,  1.8327e-02, -1.0185e-01,
        -1.7839e-02,  8.8959e-02, -1.6454e-02, -1.4561e-02, -3.2200e-02,
        -4.4695e-02, -2.6979e-02,  3.4032e-02, -8.4292e-02, -3.4016e-02,
        -1.0214e-02, -3.8364e-02, -2.2455e-02, -3.6371e-02, -1.0913e-02,
        -8.5788e-03, -3.4768e-03, -2.0611e-02,  1.7723e-02,  7.5448e-02,
         4.3746e-03,  2.8755e-02,  3.1081e-02,  1.0150e-02,  1.1538e-02,
        -4.3489e-02,  6.2981e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2931e-03, -1.8811e+00,  2.8131e-02, -1.2350e-02, -1.5281e-02,
        -3.1667e-02, -1.6372e-02, -1.0789e-02, -3.5119e-03, -6.3824e-03,
         1.5565e-02, -2.0371e-02, -5.2277e-03,  1.3731e-02,  1.6003e-02,
         1.3656e-02, -8.4184e-03, -2.3494e-04,  1.0702e-03, -7.9706e-04,
         7.6634e-02, -2.0176e-03,  1.8499e-03,  7.1872e-03,  1.5460e-03,
         4.5943e-03, -1.2455e-02,  3.6096e-03,  3.4828e-02, -9.8469e-03,
        -3.3844e-03, -4.6519e-03, -3.8356e-03,  1.3662e-02, -2.8140e-02,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0971,  3.7490, -1.1791,  0.0239,  0.0579,  0.2452,  0.0171,  0.0223,
         0.0130,  0.0275, -0.0393, -0.0792, -0.2643,  0.0615, -0.4024,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0545e+00,  5.4617e+01,  2.4330e-01,  1.2323e-01,  1.6490e+00,
         7.3914e-01,  7.2516e-01, -1.1259e+00,  7.9191e-01,  9.2299e-01,
         2.6954e-01, -3.9516e-01, -6.6012e-02, -1.7063e-01,  3.5822e-01,
        -1.4903e-01,  1.4850e-01, -1.7354e-01, -4.5014e-01,  7.9029e-02,
        -3.7574e-01, -5.8443e-01, -2.0371e-01, -2.3451e-01, -2.4785e-01,
        -3.6615e-01, -1.4289e-01,  7.8723e-02,  3.8445e-02,  6.7387e-02,
         8.8095e-01,  3.4178e-02,  3.6199e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4918e-01, -1.4595e+01,  4.9902e-01,  1.8288e-01, -7.2568e-02,
         4.5481e-01, -3.6494e-01, -1.1836e-02,  5.0448e-02,  3.6176e-02,
        -1.3429e-01, -1.0838e-01,  5.8293e-02,  2.7888e-01,  3.3533e-01,
         4.3042e-02,  3.6255e-01, -1.8775e-01,  2.0991e-01,  8.9725e-02,
         3.0237e-02,  6.7538e-02, -3.9286e-03,  6.1090e-04, -1.3414e-01,
         9.7906e-02,  1.1158e-01, -2.7217e-01,  2.9427e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 1.1039e+00,  1.1489e+01, -7.8461e-03, -3.4206e-01, -9.3872e-02,
        -3.8702e-02,  2.6954e-01, -1.8792e-01,  5.1218e-02, -4.1754e-01,
        -6.5367e-02, -6.9036e-03, -2.2184e-02,  8.1911e-02, -6.4507e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.3460e-01,  1.5264e+01, -3.7322e-01, -3.1313e-02, -4.7048e-01,
        -9.2221e-02, -7.1175e-02,  1.5506e-02, -8.3689e-03,  1.4659e-02,
        -2.7940e-01, -7.0600e-02, -4.3595e-02, -1.1352e-01, -1.5974e-02,
        -8.9660e-02, -1.0803e-01, -5.8091e-02,  3.0348e-02, -5.7741e-02,
        -8.8423e-02, -1.1569e-01, -1.1734e-01,  1.0115e-02,  1.9830e-02,
         2.4835e-02, -1.4311e-02, -1.2320e-02, -5.8139e-02, -3.7117e-02,
         1.3081e-02, -2.5493e-02, -2.0289e-02, -6.2713e-02, -3.5940e-02,
         8.3949e-02, -1.0705e-01, -8.7528e-02,  2.4359e-02, -1.2348e-01,
        -6.3728e-02, -1.1475e-01,  6.3527e-02, -6.6730e-02,  3.1922e-02,
        -2.2564e-01, -3.9078e-02, -1.0816e-01, -6.6393e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0923e-01,  1.5042e+01, -6.9271e-01, -3.8236e-02, -2.9364e-02,
        -1.8451e-01, -1.9341e-01,  9.2938e-02, -3.3755e-01, -2.8365e-01,
        -1.3513e-01,  7.6148e-03,  1.7759e-01, -5.1241e-02,  3.8279e-02,
        -3.8224e-02,  6.2070e-01,  1.1862e-01,  1.9381e-01,  1.0366e-01,
         3.1235e-02, -7.9536e-02, -1.0122e-02,  6.8598e-02, -9.8826e-03,
        -4.3224e-02, -2.2890e-01,  2.0584e-01,  1.0360e-01, -1.7161e-02,
        -7.7546e-02, -1.5704e-01,  1.3898e-02,  1.9283e-01, -6.9914e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  4.2103, -48.8123,  -1.1616,   0.5127,  -1.2768,  -0.6718,   0.8342,
         -0.1420,  -0.4430,   0.5529,  -0.7563,  -0.1715,   0.2392,  -0.1273,
         -0.8772,  -1.1097,   0.4485,  -1.1021,  -2.5862,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1687e+00,  9.2803e+01, -3.7060e-01,  5.3434e+00,  3.7084e-01,
         2.4466e+00,  2.4851e+00,  1.1875e+00,  2.2455e+00,  3.6675e-02,
         1.2474e+00, -1.4011e-01, -9.1191e-01, -5.2626e+00, -1.1307e+01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2797e+00,  6.3467e+01, -7.6995e-01,  1.3187e+00, -1.7240e-01,
        -2.0379e-01, -1.2022e+00,  2.3771e-01,  8.0738e-01, -8.1835e-01,
        -3.6503e-01, -1.3356e-01, -4.4767e-01, -3.0318e-02, -9.0117e-01,
         4.0505e-02, -2.8707e-01, -5.6807e-01, -2.7006e-01, -1.4941e-01,
        -1.1721e+00,  1.1397e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0907e+00, -1.9102e+01,  9.1066e-01, -3.7172e-01, -5.4636e-01,
        -3.9624e-02,  2.7409e-01,  1.6772e-01, -1.0485e-02, -1.3914e-01,
        -4.7661e-02,  1.3456e-01, -1.1472e-01,  2.0200e-01,  6.2499e-02,
         5.0872e-03, -1.1360e-01,  7.8888e-02,  2.5197e-02, -2.0395e-01,
        -1.5381e-01, -1.1075e-01,  1.1006e-01,  1.0855e-01,  2.4630e-01,
        -6.4815e-02, -3.4319e-02, -1.2963e-01, -3.5737e-01, -4.1068e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.6858, -52.2479,   1.9317,   3.1291,   4.7046,   1.1066,   0.1172,
          1.6033,   1.1849,  -2.2783,  -1.1671,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6994e+00,  6.9710e+01, -2.1734e+00, -9.1275e-01,  2.6466e-02,
        -1.5955e+00, -4.8145e-01,  3.6090e-02,  2.1567e-01,  1.1142e+00,
        -1.4423e+00,  3.6141e-01,  3.2315e-02, -1.3456e-01, -6.9663e-02,
         1.3602e-01,  8.8872e-02, -4.2488e-01,  2.5248e-02, -1.0127e-01,
        -2.4403e-01, -4.9181e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7491e-01,  6.2651e+01,  5.0911e-01, -1.4382e+00,  2.1190e+00,
         4.0804e+00,  6.0108e-01,  3.6271e-01, -3.9881e-01, -5.2692e-02,
        -7.7228e-01, -1.2821e+00,  4.2323e-01, -7.5239e-01,  7.1725e-03,
        -2.6874e-01, -5.6133e-01, -3.6230e-01,  7.1702e-01, -9.8164e-02,
         7.1057e-02, -1.3524e-01,  7.3065e-01, -1.2882e-01, -3.4417e-02,
        -1.1867e-01,  1.7190e-01, -4.6352e-01, -6.2546e-01,  2.1451e-01,
        -3.6769e-03,  8.0056e-01,  8.3816e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.1960e-01, -3.0522e+01, -1.6992e+00,  2.1485e+00,  3.3415e-02,
        -5.7382e-01, -1.1899e-01, -4.1970e-02,  1.1369e-01,  3.0734e-01,
        -1.1914e-01, -2.2397e-01,  1.5102e-01,  2.8490e-01, -4.2288e-01,
         2.6076e-01, -7.2454e-02,  1.5933e-01,  1.6063e-01,  1.7266e-01,
         2.3045e-01,  5.9944e-01, -3.7325e-01,  3.1527e-01, -6.5658e-01,
         2.7890e-01, -3.0377e-02,  2.5466e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1136e-01, -6.3114e+00, -2.2762e-01, -4.0984e-02, -2.3426e-01,
        -8.1125e-02, -1.4091e-02, -7.2493e-02,  8.0831e-02, -1.8056e-02,
         7.4525e-02, -2.1379e-02, -3.6340e-02,  4.8228e-02,  4.5819e-02,
        -3.7105e-03, -3.4777e-02,  2.0670e-02,  2.3511e-02, -8.4207e-02,
        -1.1088e-02,  8.8445e-04,  5.7490e-02, -3.4052e-02,  6.9505e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-2.4967e+00, -2.3648e+01,  1.8758e+00, -5.7025e-01, -7.3545e-01,
        -3.6631e-01,  3.0053e-02,  2.5678e-01, -2.8693e-01,  1.0821e-01,
        -9.0022e-02, -1.8003e-02,  1.6483e-01,  2.7034e-01,  5.0324e-01,
         1.4071e+00,  5.4143e-01,  3.0228e-01,  2.6158e-01, -2.7976e-01,
         1.8767e-01,  4.6502e-01,  8.4385e-01, -7.3430e-02,  5.4269e-01,
         4.4047e-01,  2.3747e-01, -1.4852e-02,  3.0646e-01,  2.2899e-01,
         7.6405e-02,  2.0990e-01,  2.6990e-01,  9.1732e-02,  1.4938e-01,
        -2.0414e-01,  1.6322e-01,  1.3883e-01, -9.9432e-02,  1.1882e+00,
         9.3000e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0405e+00, -4.9748e+01, -3.1850e+00,  7.8406e-01, -1.0646e+00,
        -2.0945e-01,  5.3828e-01, -5.8296e-01,  6.1992e-01,  2.2042e-01,
         6.1960e-01,  4.1519e-01, -3.1190e-01,  1.6790e-01,  1.9932e-01,
         7.4350e-01,  1.2840e-01,  3.6518e-01, -4.4830e-02, -1.2740e+00,
         1.7406e-01,  9.5418e-01,  7.2854e-02,  1.2092e+00,  2.9944e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1664e-01, -3.2728e+01, -2.6320e+00,  1.6532e+00, -7.2430e-01,
        -1.3392e-01,  2.0374e-01, -5.6378e-01,  1.8754e-01,  3.7801e-01,
        -1.9249e-02, -3.8585e-01,  2.5776e-01,  1.5097e-01,  5.4735e-01,
         3.2446e-01, -4.1944e-02,  2.1627e-01,  1.1758e+00,  1.6126e-01,
        -3.4223e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.5807, -55.2251,  -3.5552,  -0.0816,   0.4432,  -0.5594,  -0.2304,
          1.7145,  -0.6032,   0.8730,   1.0494,   0.7090,  -0.8189,   0.6194,
          1.8182,   0.6390,   1.3686,   1.0133,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.6496e-01,  3.1916e+01, -7.4422e-01,  1.1255e+00,  1.1906e-01,
        -5.1333e-01, -3.1757e-01, -2.0686e-01, -2.5932e-01, -2.5168e-01,
        -1.9370e-01, -8.7444e-02, -2.7368e-01,  2.7461e-02, -3.7897e-02,
         2.6803e-01, -2.3712e-01, -2.5309e-01,  1.1192e-01, -6.4580e-02,
         9.7754e-02,  1.1974e-01,  4.0367e-02, -1.2602e-01,  6.1365e-01,
         5.3491e-02,  2.0157e-02, -2.5176e-02,  8.7861e-02, -3.0925e-01,
         6.8477e-02,  7.3238e-02, -1.5917e-01,  2.2137e-01, -3.6142e-01,
         2.3512e-02,  4.8899e-02,  2.4490e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7618e+00,  4.9040e+01,  3.1572e-01, -9.1241e-01,  3.8877e+00,
         1.1003e+00, -5.2082e-01,  3.6404e-01,  1.3937e+00, -3.4611e-01,
         9.0161e-01,  5.1711e-01,  3.6809e-01, -1.7925e-02,  2.6670e+00,
         8.9763e-01, -4.6663e-01,  4.4460e-01,  2.7307e-01, -4.2503e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.5613, 101.4663,  -1.0651,   2.3314,   1.7244,   2.3432,  -1.1240,
         -0.8679,  -0.8374,   0.1502,   0.9009,  -1.0551,  -2.7322,  -2.7075,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0100e-01,  1.9506e+01, -2.6077e-01, -6.1153e-01, -6.3129e-01,
        -1.6797e-01, -5.5169e-01, -2.0703e-01,  2.0683e-02, -1.4734e-02,
         5.0616e-02, -3.5240e-01, -1.0523e-01, -2.4125e-02,  1.2563e-01,
         3.0114e-02,  1.3298e-01,  1.1572e-01,  8.4891e-02,  6.0312e-02,
         1.7847e-01,  5.6383e-01,  1.1936e-01,  3.6205e-03,  1.1157e-01,
        -4.4350e-01, -4.9026e-02, -1.0767e-01, -9.2445e-02,  1.0447e-01,
         4.5228e-02, -2.1277e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.6306, -23.2641,  -0.5153,  -1.4388,  -0.1499,   0.3393,  -0.2192,
          0.3747,   0.2889,  -0.1421,  -0.0610,   0.0259,   0.2340,  -0.1233,
         -0.1538,  -0.0544,  -0.0482,  -0.2402,  -0.0883,   0.4379,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8776e-01,  5.8474e+00, -1.2110e-01, -4.3927e-02,  8.3752e-02,
         9.1306e-03,  7.5992e-03, -4.9485e-02, -6.9455e-02,  3.1326e-02,
         2.0097e-02, -8.7186e-03,  4.4855e-03,  1.2797e-02, -7.7389e-03,
         9.5666e-03, -7.2857e-02, -4.6877e-02, -5.3608e-02, -1.4020e-02,
         4.3681e-02,  1.0591e-02,  4.3660e-02, -3.4228e-02,  1.6343e-01,
        -4.9755e-02, -6.2682e-02,  5.3678e-03,  5.5863e-02,  7.9644e-03,
         3.2963e-02, -7.5159e-05,  1.2956e-01,  7.1301e-02,  4.2128e-02,
         4.4756e-02, -4.5988e-02,  1.4041e-03, -9.3394e-04,  1.2156e-02,
         1.8750e-02, -4.3679e-02, -2.7552e-02, -2.6926e-02,  1.7549e-03,
         1.1169e-01, -4.9934e-02,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0114, 75.8831, -0.5320, -2.0095,  2.1280, -0.5149, -0.5680,  0.1975,
        -2.4975,  0.1694, -1.2736,  0.3582, -0.4844, -0.7999, -4.0201,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.5911e-01,  9.5772e+01,  1.3364e+01,  1.3325e+00,  1.0005e+00,
         1.6682e+00, -8.4746e-01,  3.4416e-01, -6.4402e-01, -4.2364e-01,
         4.7698e-02, -7.3852e-01, -8.2902e-01,  4.9244e-01, -2.2119e+00,
         1.4214e+00, -2.9954e-02, -8.1660e-01, -9.1798e-01, -8.5746e-01,
         5.6115e-01, -6.4376e-01, -4.0721e-01, -3.7519e-01, -3.0568e-01,
         1.2042e-01, -1.5984e-01,  4.7975e+00,  5.6472e-02, -1.3957e-03,
        -3.2669e-01,  3.5054e-01,  1.8529e-01, -1.7654e-01, -2.7506e-01,
        -3.0080e-01, -1.9155e-01,  1.1042e-01, -1.4710e-01, -2.6417e-01,
         2.8025e-01,  4.4924e-01, -3.2341e-01, -1.5488e-01, -2.8200e-01,
        -4.4762e-02, -1.8525e+00, -5.8864e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 2.5157e-01,  1.7142e+01,  8.2526e-02, -2.8025e-01,  1.3247e-01,
        -1.9245e-01,  2.2298e-02, -2.2655e-02,  1.3184e-01,  6.9494e-02,
         1.4082e-01,  1.1888e-02, -2.8184e-02, -1.2008e-01, -2.5173e-02,
         2.5726e-02, -3.0562e-01, -8.1856e-02, -7.5387e-02, -2.2115e-01,
        -8.8301e-02, -1.3306e-01,  2.4120e-02,  5.0154e-03,  5.5551e-01,
        -5.1972e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  2.0654, -39.3240,  -2.8714,   1.0708,   0.6671,  -0.3930,   0.1527,
          0.4891,  -0.0442,  -0.4282,  -1.3708,   0.2606,   0.1346,  -0.5912,
         -0.1021,   0.2782,   0.0804,   0.5130,  -0.3020,  -0.6283,   0.8013,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1554e-01,  1.5127e+01,  6.4744e-01, -7.4022e-03, -3.9971e-01,
        -3.0976e-01, -7.5410e-02, -9.7240e-02,  3.9724e-03, -2.3529e-03,
         6.0846e-02,  8.7318e-02,  1.1390e-02,  5.6906e-02, -7.7224e-02,
         6.3385e-02,  1.2594e-01, -2.4127e-02, -1.1215e-01, -3.1801e-01,
        -5.0044e-02,  1.6204e-01,  4.3023e-02, -9.6551e-03,  2.5565e-01,
         1.9108e-02, -3.2902e-02,  5.3058e-03,  3.3383e-02, -7.4755e-02,
        -9.2097e-02,  7.2435e-02,  1.0419e-01,  1.0309e-01, -1.9190e-02,
         1.4571e-02,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3228e+00,  2.7202e+01, -2.0074e+00,  1.3146e+00,  8.0266e-01,
        -2.6673e-01, -3.9866e-01, -5.8693e-01,  2.2525e-02,  1.8809e-01,
         7.4315e-01, -3.0011e-01, -2.5749e-01, -1.9955e-01,  3.2970e-01,
         2.3540e-01, -5.0220e-02, -3.9798e-01,  1.6021e+00, -2.1524e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9473e-01,  6.1762e+01, -4.0100e-01,  1.9987e+00,  5.1266e+00,
         7.1613e-01, -1.6510e-01,  1.7350e-01,  1.0931e+00, -4.5472e-01,
         5.4128e-01,  5.1606e-01, -5.4164e-01,  9.4450e-01,  5.1998e-01,
         3.3769e-01, -2.2917e-02, -1.5626e-01, -3.2013e-02,  2.8522e-01,
         2.4868e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7039, 67.0933,  0.4069, -2.3601, -1.4741, -0.4899,  0.0955,  1.2558,
         3.4190, -1.3110,  0.3622,  0.1751,  0.1425,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2655e+00,  6.5468e+01,  2.6571e+00, -1.4197e+00,  5.2295e-01,
        -1.4871e-01, -1.7588e+00, -1.1372e-01, -3.3415e-01,  4.5179e-01,
        -5.0200e-01, -5.7941e-01, -4.2022e-01, -6.9915e-01,  1.0526e+00,
         2.9626e-01, -5.9329e-01,  1.8045e-01, -3.6352e-02, -3.0382e-01,
         3.5800e-01,  9.5354e-02,  2.4625e-01, -3.2026e-01,  2.6833e-01,
         1.3794e-01, -1.5632e+00,  5.3719e-01, -4.0376e-01,  3.8524e-02,
        -7.0955e-02,  5.8709e-01,  1.4134e-01, -1.1417e-01,  1.1115e-02,
        -3.2274e-01, -8.7619e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9440e-01,  8.4020e+01,  3.9794e+00,  6.9513e-01,  2.4065e-01,
        -5.6618e-01,  6.3041e-01,  8.7468e-01, -7.8262e-01, -1.5775e+00,
        -1.8261e+00, -1.1121e+00, -5.5165e-03,  2.0251e-01, -5.4206e-01,
         7.3217e-01,  8.0810e-01,  5.1220e-01, -2.3988e-01,  8.4063e-01,
        -4.1410e-01, -6.9187e-01, -1.1113e-01,  9.0722e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2554e+00,  1.9585e+01,  1.8482e+00,  3.8494e-01, -4.8536e-01,
        -1.4668e-01, -3.7873e-01,  4.9773e-01,  4.8065e-02,  5.5780e-02,
        -6.6357e-01,  2.5092e-01,  5.5838e-01,  2.3269e-01,  1.5295e-01,
        -3.4809e-01, -1.9835e-01,  2.0982e-01,  2.6038e-01,  1.3946e-01,
        -4.5097e-02, -3.2811e-01, -2.3484e-01, -1.2829e-02, -6.8385e-02,
         1.6341e-02,  2.2474e-01, -2.5349e-01,  1.6844e-01,  2.1481e-01,
        -4.7978e-01,  2.1221e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6552e-01,  1.1467e+01, -5.0527e-01, -1.4476e-01,  7.4411e-02,
        -3.5563e-01,  5.9637e-02,  9.7773e-02,  2.4761e-01, -1.4370e-01,
         3.0216e-02, -1.8733e-02, -2.4223e-02,  6.1821e-02, -1.2120e-02,
         4.0625e-02, -9.7250e-03, -2.2839e-02,  9.9443e-03,  1.4199e-01,
        -1.3569e-01,  9.7456e-02, -5.9708e-02, -8.2344e-02, -1.0984e-02,
         5.0322e-02,  1.1855e-01, -6.6003e-02, -9.4217e-02,  1.5032e-01,
        -1.6415e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2269e-01,  9.6597e+00, -1.2509e+00,  3.0309e-02, -9.4864e-02,
        -5.7456e-02,  1.1453e-01, -1.0556e-01,  3.2320e-02, -8.1725e-02,
         2.8264e-01,  9.1789e-02,  1.6903e-01, -2.0266e-01,  9.6560e-02,
        -4.1657e-02, -1.6966e-02,  8.6657e-03,  5.2628e-02,  4.8748e-02,
        -7.0077e-02, -1.4415e-01,  1.1350e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0848e+00,  5.1347e+01,  2.6738e-01, -9.6351e-01, -8.3290e-01,
        -8.5915e-02, -4.4462e-01, -3.4909e-01, -2.2126e-01, -7.8651e-02,
        -6.6812e-01,  4.9013e-01, -2.9382e-01, -2.1867e+00,  4.6618e-02,
        -9.7400e-02, -3.0028e-01, -5.8246e-01, -3.1365e-01, -8.0118e-02,
        -9.0639e-01, -4.1801e-01,  2.9835e-01, -2.4716e-01,  8.7824e-03,
         2.5704e-01,  2.6353e-01, -1.3057e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-1.3853e+00,  3.1486e+01, -9.7123e-01,  1.1370e-01,  8.4412e-02,
         6.2330e-01,  3.1012e-01, -1.0718e+00, -1.8540e-01, -6.5050e-01,
         2.5676e-01, -2.2349e-01,  3.4186e-02,  3.9972e-02,  2.9443e-01,
         2.9686e-01,  6.4318e-03, -7.3176e-02, -1.2122e-01,  2.3540e-02,
         6.3154e-01,  3.0376e-01, -4.2352e-01,  7.2949e-02, -5.1090e-02,
         1.1080e-01,  4.2464e-01,  4.0137e-01,  2.5882e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3789e-01, -1.5325e+01,  1.0289e+00, -3.5696e-01, -2.0862e-01,
        -2.6245e-01, -1.8973e-02,  1.4185e-01, -4.7071e-02, -2.4141e-01,
        -9.0655e-02, -2.8099e-02,  1.2695e-01, -1.3827e-01,  6.8208e-02,
         6.6221e-02, -1.0601e-01,  9.1322e-03,  1.6710e-01,  2.8715e-01,
         6.3291e-02, -1.3112e-01, -4.3500e-02, -5.3397e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2225e+00, -4.9998e+01, -2.8113e+00,  1.1876e+00, -2.7527e+00,
        -1.3238e-01,  1.8356e-01, -6.7321e-02, -6.6414e-01,  6.8169e-01,
         1.0498e-01, -8.0565e-02,  6.9829e-01,  6.0148e-01, -6.4725e-01,
         5.3788e-01,  3.6711e-01, -7.8135e-02,  1.6589e+00, -6.4115e-01,
         1.0150e+00, -4.5038e-02,  1.9744e-01,  3.3816e-02,  4.8450e-02,
        -1.5122e-01,  1.3774e-01,  2.1851e-01,  5.9910e-02,  1.0654e-01,
         6.6591e-02, -2.3103e-01,  9.7700e-01,  5.6215e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.7477e-02, -2.6816e+01,  5.6236e-01,  1.2166e-01,  1.0324e-02,
        -5.6864e-01, -3.3497e-01, -2.6073e-01, -2.8752e-01, -5.5940e-01,
        -4.0373e-02, -1.6594e-01, -1.0151e-01,  6.1660e-02, -5.9216e-01,
        -3.6434e-01, -3.6596e-01, -1.7170e-01, -1.1030e-01, -4.5596e-01,
         1.2990e-01,  2.6313e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.9330, -26.2179,   0.4993,  -0.5713,  -0.2900,   2.7252,   0.2758,
         -0.6303,  -0.2439,   0.4093,  -0.5225,   0.1803,   0.0656,  -0.0908,
         -0.0860,   0.3938,   1.2455,  -0.0437,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.4805e+00, -7.6865e+01, -1.6062e+00, -2.0018e+00, -1.3186e+00,
        -8.2780e-01, -9.1707e-01, -2.2285e+00,  4.4621e-01, -1.0603e+00,
        -1.1028e+00, -5.4598e-01, -1.6501e-01,  7.8033e-01,  4.9796e-01,
        -6.3457e-01,  4.7272e-02,  3.3202e-01,  4.0016e-01,  8.2954e-01,
         5.4367e-02, -6.7396e+00,  4.4207e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6380e+00, -6.7511e+01,  1.9943e+00,  4.1052e-01,  1.6475e+00,
        -5.8751e-01, -4.9624e-01, -7.2579e-03,  6.2280e-01, -5.5493e-01,
        -1.1354e-01,  9.7386e-01, -3.0358e-01, -6.0898e-01, -3.9699e-01,
        -4.4180e-01, -9.7810e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 10.4054, -83.2341,  -1.1611,   0.7053,   0.7786,  -0.5939,  -1.9703,
         -1.8534,   0.9734,  -3.2368,  -0.5659,   0.0995,  -1.4906,  -0.3726,
         -1.1792,  -1.4527,  -1.4709,   4.2790,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -4.1096, -53.5048,   0.2450,   0.5270,   0.5016,   0.4478,   0.0614,
         -0.1443,   0.3160,  -2.8519,   0.0794,   0.4897,   0.2946,  -0.4883,
          0.1901,  -0.4938,   1.0568,  -0.9113,  -0.8826,  -0.4136,  -0.5314,
          0.0750,   1.1473,   0.4449,   0.0824,   1.8394,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7831e+00,  1.5554e+01,  6.7349e-02, -2.5820e-01,  3.4765e-02,
        -3.5692e-01, -1.8420e-01, -1.3558e-01,  3.4333e-03, -1.3144e-01,
        -1.2299e-01,  2.4218e-01, -1.5854e-01,  1.2834e-01, -3.8759e-02,
        -4.6915e-02,  7.4118e-02,  1.0760e-01,  1.4419e-01, -1.1620e-01,
         2.0027e-01,  7.2774e-02,  1.2940e-01,  1.0387e-01, -4.2452e-02,
         3.8799e-02, -7.3921e-02, -1.7917e-02,  8.7135e-02,  3.3732e-01,
        -2.1836e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.1076e-01, -1.4903e+01, -7.8536e-01,  3.0152e-01, -1.7044e-01,
        -1.3395e-01, -8.7539e-03, -1.2138e-01,  3.3971e-01,  1.3477e-01,
         3.3072e-01,  1.3357e-01,  3.3758e-02,  5.1971e-02,  1.7020e-01,
        -1.2413e-01, -5.4673e-01,  5.0434e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -2.2707, -67.7791,   7.6280,   3.7334,   1.2581,  -1.8284,   0.1672,
          1.2149,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 9.3795e-01,  7.9905e+00,  3.1235e-01, -1.9537e-01, -2.9051e-01,
         1.9730e-01,  7.3993e-02,  8.8110e-02,  1.1750e-01,  2.0427e-01,
         4.8179e-02, -4.1852e-03,  1.2188e-02,  1.2830e-02, -5.9863e-02,
         2.1064e-01,  7.9729e-02,  2.4502e-02, -7.4008e-03,  1.2112e-01,
        -1.6817e-02, -6.8939e-02, -1.6720e-02, -3.8159e-02, -9.1998e-02,
         6.9106e-03, -7.9541e-02,  3.1284e-02,  4.9173e-02,  2.4578e-02,
        -2.9871e-02,  9.4040e-03,  2.5363e-03,  4.3351e-02, -8.4916e-03,
        -4.5434e-02,  8.5799e-02,  2.5865e-02, -3.1105e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1091e+00,  6.8291e+01, -3.9497e+00,  3.1068e-01,  7.8545e-01,
         8.6340e-01, -2.6843e-02, -2.3492e-02,  1.4377e-01, -5.3358e-01,
        -2.7206e-01,  3.2468e-01,  1.0650e+00,  1.8255e-01,  5.7423e-01,
         1.1019e+00, -7.8131e-01, -2.9728e-01,  5.0268e-01,  3.0240e-01,
         8.6380e-02, -7.0947e-01, -2.7223e-02, -1.9601e-01,  4.6827e-01,
        -5.9833e-01, -1.5137e+00, -2.1892e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.0283, -18.1442,  -0.2474,   0.1749,  -0.5029,  -0.2246,   0.2308,
         -0.4111,  -0.1854,  -0.6473,  -0.3358,  -0.5270,  -0.0207,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.8507, -25.1827,   1.1891,  -0.1889,  -0.0449,   0.3488,   0.3866,
         -0.0639,  -0.0592,   0.5320,  -0.1033,   0.6255,  -0.3068,  -0.6999,
          1.3246,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3564e-01,  7.2324e+00, -5.7181e-01,  3.7980e-01,  2.8464e-01,
        -1.4418e-01,  8.0304e-02, -6.5724e-02, -5.4413e-02, -3.5815e-02,
        -8.1475e-02, -2.6789e-02,  1.9108e-01, -3.4433e-03,  1.6448e-01,
        -3.1979e-02, -4.5167e-02, -1.5784e-02,  2.4222e-02, -6.6390e-02,
        -7.7242e-02, -2.4012e-02, -7.4420e-03, -7.6577e-02,  2.5816e-02,
        -5.0145e-02,  1.8704e-02,  3.7212e-02, -2.3949e-02,  1.1184e-02,
        -4.8730e-02,  2.0330e-02, -2.3192e-02,  6.8843e-02,  1.1315e-01,
         4.7870e-02, -4.7235e-02, -4.7608e-02, -4.8546e-02, -1.7541e-02,
        -2.7088e-02, -8.9867e-02,  2.7988e-02, -2.2585e-02, -1.2516e-02,
         5.3587e-02, -3.1013e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3957e-01, -2.8824e+01, -1.2412e+00,  2.4330e-01,  1.6996e-01,
         1.7044e-01,  2.5618e-01, -3.9656e-01, -2.2921e-01,  6.2666e-02,
         6.8784e-03, -4.5471e-01, -7.8715e-02,  2.0867e-01, -1.8912e-02,
         2.1947e-01, -3.1062e-01, -2.9938e-01, -5.1393e-02, -3.4410e-02,
        -1.9882e-01,  1.7659e-01, -8.5487e-02, -1.1183e-01, -1.4638e-01,
         2.7556e-01, -7.4147e-01,  2.0919e-02, -2.2534e-01, -1.2450e-01,
        -1.5700e-01, -6.2333e-03, -2.9709e-02, -3.6892e-03,  6.8134e-02,
        -1.0176e-01,  1.2430e-01, -1.0596e-01,  1.6609e-01,  3.7465e-01,
         8.1993e-02, -9.0081e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5601, 27.8923,  1.0487, -0.2907,  0.6957,  0.6699,  1.4134, -0.0341,
         0.2003,  0.6259,  0.1981,  0.2175, -0.2992, -0.3326,  0.4774, -0.7660,
         1.5311,  0.0417,  0.0699,  0.0995, -0.1767, -0.0585, -0.0668,  0.1564,
        -0.5771,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2085,  2.6292,  0.1337, -0.0448, -0.0384, -0.0410,  0.0161, -0.0088,
        -0.0119,  0.0130, -0.0028, -0.0098,  0.0225, -0.0623, -0.0346,  0.0309,
         0.0320,  0.0113,  0.0177,  0.0289,  0.0323,  0.0307, -0.0823,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9182e-01,  6.2862e+00,  2.8920e-01,  3.8518e-02, -7.0719e-02,
         3.7376e-02,  1.6039e-02,  4.5388e-03, -3.0071e-01,  4.0634e-02,
         7.1131e-02, -1.0276e-01,  7.7735e-02, -9.6193e-02, -7.6264e-02,
        -4.5981e-02, -4.0725e-03, -2.8430e-02,  2.5524e-02, -5.5160e-02,
        -6.6646e-02,  8.4550e-02,  6.6152e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1888,  6.9275, -0.2711, -0.1295, -0.2746,  0.0405, -0.0859,  0.1623,
         0.0263, -0.2425,  0.1371, -0.4628, -0.2139,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.3970, -72.9776,  -2.0073,   1.6466,   0.6824,  -0.4972,   1.5473,
          0.9427,  -0.5077,   1.1375,  -0.1801,  -0.8567,  -0.0784,  -1.2495,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9730, 47.3748, -0.4227,  0.5605, -0.4232,  0.1125, -0.1557, -0.4818,
        -0.4721, -0.5309, -0.3683, -0.4818, -0.4474,  0.1753, -0.1329,  0.4379,
        -0.9078,  0.5216,  0.1481,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 4.3423e-01,  5.4997e+00, -3.3259e-01, -2.1645e-01,  1.1117e-03,
         1.6137e-01,  1.8505e-02,  1.6628e-01, -3.3840e-02,  1.2501e-01,
        -1.9255e-02,  9.1428e-03, -2.8824e-02,  1.0674e-01,  7.5306e-02,
        -1.3464e-01,  2.9649e-02,  1.6632e-02, -6.3974e-02,  5.2250e-02,
        -5.6523e-02, -1.3124e-02, -2.9422e-02,  4.0607e-02, -5.2649e-02,
        -4.4457e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1633e+00, -3.2169e+01,  9.4122e-01,  5.9459e-01, -1.1037e-01,
         7.4495e-01,  2.3855e-01,  5.3900e-01,  9.7399e-02,  1.4351e+00,
        -1.5088e-01, -7.0600e-01, -7.1660e-01, -6.4338e-01,  1.0403e-01,
        -2.6864e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7076, 56.5632,  0.3152,  1.2703,  1.1104,  1.3042, -0.6971,  1.1858,
         1.1260,  0.8801, -0.1515,  0.0658, -0.1497, -0.9041, -0.3983,  2.6712,
        -0.2569,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9989, 57.5804, -0.2196,  0.6233, -1.1469,  1.2590,  0.4958,  1.4162,
         2.1199,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.0284e-04,  9.7485e+00, -3.4378e-01,  2.0360e-02,  3.3152e-02,
         3.7891e-02,  3.9894e-03,  1.2136e-01,  3.6390e-02,  5.5682e-02,
         4.1931e-01, -2.0087e-02,  1.2130e-01, -2.0348e-01,  5.4948e-02,
         4.9974e-02,  2.1383e-02,  2.8334e-02,  1.2051e-01,  1.7146e-01,
         7.8386e-02,  4.8103e-02, -1.3169e-01, -6.3116e-02,  5.2695e-02,
         5.9981e-02, -1.3687e-01, -4.9301e-02, -7.9842e-03, -9.4738e-02,
        -9.9321e-02,  2.4629e-03, -3.5590e-02,  6.0888e-02, -1.2532e-02,
        -1.7137e-01, -5.4645e-02,  1.6724e-01, -9.1894e-02, -6.9659e-02,
        -3.0843e-02,  8.5332e-03, -1.9159e-01,  2.1807e-01, -4.1168e-02,
         9.3227e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9093e-01,  1.6632e+01, -2.7025e-01,  1.9401e-01,  2.2195e-01,
        -2.7041e-02,  2.0439e-01,  2.8525e-02, -4.0077e-02,  9.7743e-02,
        -4.3383e-02, -1.5916e-02, -8.5340e-02,  9.2507e-02, -1.1408e-01,
         2.0275e-01,  3.4284e-02,  2.9175e-01, -2.8526e-02,  1.1786e-01,
         8.3467e-02,  5.7580e-02, -1.1651e-01,  5.6560e-03, -7.8510e-02,
        -3.1914e-02,  4.2777e-02, -3.1960e-02,  2.6434e-01,  2.0849e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.2610, -47.8154,  -1.5426,   2.0756,   0.0861,  -0.4080,   1.7050,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2675e+00, -3.3762e+01,  1.1354e+00,  7.7338e-01, -1.4740e+00,
         5.3523e-01,  3.9292e-03, -5.8878e-01,  9.8692e-01,  8.2706e-01,
         4.8520e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.2740, -70.1659,  -1.2176,  -1.1469,   0.3133,  -2.9755,   0.4384,
         -1.2540,  -1.3911,  -0.3588,   0.1929,  -0.7818,  -2.9409,   0.1575,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8933e+00, -5.8799e+01,  3.3150e-01, -4.0307e-01,  4.7495e-01,
        -1.5279e-01,  2.3277e-02,  1.2406e-01,  3.6693e-01,  7.1800e-01,
        -6.9782e-02,  1.8593e-01,  7.3825e-01,  1.4182e+00,  2.4374e-01,
        -1.8262e-01,  2.6015e-02,  1.4877e-01, -4.6222e-01, -1.1498e+00,
        -2.4300e-01, -9.2370e-03,  2.5808e-01,  7.8995e-02,  3.3390e-01,
        -3.6900e-01,  8.2163e-01, -9.4568e-01,  2.8409e-02,  2.9693e-01,
         1.2140e+00, -4.2562e-01, -9.8996e-01, -1.3314e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5796e-01,  3.2714e+01,  1.4427e+00,  5.0439e-01,  2.6249e-01,
        -2.2206e-02,  6.8251e-01,  3.0005e-01, -3.4345e-01,  3.7530e-01,
        -1.0812e-01, -5.3391e-01, -5.3383e-02,  5.5541e-01,  1.3132e+00,
         4.8065e-02, -1.0537e+00, -6.7849e-01,  3.4796e-02, -6.6573e-02,
        -4.1127e-01,  6.4422e-02, -7.6505e-01,  1.3989e-01, -1.3814e-01,
        -2.8953e-01,  3.9240e-01,  2.0805e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5152, 13.3232,  1.6131,  0.1394,  0.5081,  0.0259, -0.0413,  0.2395,
         0.1750,  0.2336,  0.0963, -0.1900,  0.1354,  0.1380, -0.1246,  0.1813,
        -0.0965, -0.0549, -0.1917,  0.0643,  0.1163,  0.1107,  0.5955,  0.1152,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([-9.5986e-01, -1.7042e+01,  7.9829e-01, -4.5403e-02, -2.5700e-01,
        -4.6171e-02, -6.1715e-01, -2.7953e-01, -2.8710e-01, -2.6995e-02,
         6.8940e-01,  7.0764e-02,  1.1238e-01, -1.0999e-01, -1.5608e-01,
         7.7048e-02,  7.9147e-02,  6.8462e-03,  1.9769e-01, -6.6185e-01,
         4.9052e-02,  3.4870e-01, -1.7911e-02, -6.8827e-02, -1.0400e-01,
        -5.0563e-01, -1.5780e-01, -2.0622e-01, -4.7041e-02,  2.5292e-01,
         6.0067e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.7485, -21.3194,   0.7702,  -0.5102,  -0.3846,   0.1736,   0.4731,
          0.1428,  -0.1978,  -0.2597,  -0.1002,   1.4559,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7705, 66.0589,  2.3809, -0.5252,  1.0359,  0.2405, -0.8328, -0.3998,
         0.1682, -0.1952, -1.5202, -2.5845,  1.5838,  0.2254, -0.0975,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1417e+00,  1.8657e+01,  3.4764e-01,  2.6225e-02,  9.8058e-01,
         3.4916e-01,  6.1596e-01, -9.2439e-02,  5.3309e-02, -2.5731e-03,
        -1.2466e-02,  2.5008e-01, -4.6683e-02, -7.2554e-02,  1.9579e-02,
         6.6633e-02,  3.1465e-02, -1.0581e-01, -1.0500e-02, -1.4965e-01,
         1.4451e-02,  6.5215e-03,  1.7048e-01, -9.4968e-02,  2.5012e-02,
        -1.0911e-01,  2.2068e-02, -5.6936e-02, -1.4465e-01,  5.5503e-02,
         3.9521e-02, -1.8425e-01,  2.4853e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.9694e+00, -6.2100e+01,  2.3539e+00,  2.2380e+00, -5.5662e-01,
         5.0210e-01, -2.1790e-01,  1.7750e-01,  3.4811e-02, -1.5567e-01,
         5.6631e-01,  3.9668e-01,  9.7447e-01, -8.2978e-01, -1.1406e+00,
         2.0038e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4324, 43.3798, -1.1795,  0.9662,  0.8182, -0.7538, -0.9551, -1.4174,
         0.4182,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1857,  2.5312, -0.0715,  0.2712, -0.2199,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3571, 26.2236, -2.8650,  0.3787, -0.2639, -0.7426, -0.7763,  0.5729,
        -0.1289,  0.8981,  0.4895, -0.2380, -0.3986,  0.1103, -0.1344,  0.1036,
         0.3520, -0.0375,  0.0743,  0.0410,  0.1186, -0.1186,  0.1310,  0.0875,
         0.2214, -0.5967, -0.4930,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9940, 58.5797,  3.8487,  1.5135,  4.8451, -0.7141,  1.4808,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0620, 36.2279,  1.9321,  0.4577, -1.0428,  0.5315,  0.4799, -0.0704,
        -0.4830, -0.1388,  0.3668, -0.1033, -0.2338,  0.2105,  0.2083,  0.2884,
         0.1214, -0.6125, -0.7985, -0.6581,  0.6796,  2.0962,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5434e-02,  6.8436e+01, -2.2412e+00,  7.3825e-01,  1.3400e+00,
         7.7692e-01, -5.0430e-01,  1.2527e+00, -3.9961e-01,  1.1697e-01,
         9.2971e-01,  3.7681e-01, -2.0057e-02,  1.2466e+00, -7.9870e-02,
         4.8108e-01, -1.3751e-01, -1.4488e-01, -1.9851e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.5998e-01,  3.7173e+01, -1.2079e+00, -5.2706e-01, -6.7359e-01,
         1.5211e-01,  2.7241e-01,  4.3476e-01,  1.1973e-01,  5.0115e-01,
         5.4701e-02,  2.5747e-01,  2.1942e-01,  2.1188e-01,  7.3624e-02,
        -3.1263e-02,  3.5730e-01, -7.0015e-02,  1.1305e-01,  2.9675e-01,
         3.4190e-01,  4.2314e-01,  2.6544e-01, -9.3503e-02,  5.4225e-01,
         9.9226e-02, -7.4775e-02,  3.8578e-01,  7.1621e-01,  6.7022e-01,
         7.7911e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 1.5900e+00, -1.2087e+01,  5.9161e-02,  1.5855e-01, -3.7022e-01,
        -1.8942e-01,  2.1695e-01,  6.5379e-02,  2.8423e-01, -3.2867e-02,
        -6.8830e-02, -1.0852e-01, -1.4174e-01, -4.0414e-01,  1.5355e-01,
        -1.1868e-01,  1.5308e-01, -3.5706e-02,  5.6963e-02, -8.1943e-03,
        -4.0745e-02, -1.6379e-02, -1.1757e-01, -4.2438e-02,  1.4208e-02,
        -6.2980e-02, -2.2681e-02,  8.8804e-02,  7.2763e-02, -1.2597e-01,
         1.0590e-01, -2.5730e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1628e-01, -2.0075e+01,  8.7358e-01,  1.0863e-01,  2.5908e-01,
         2.6015e-01,  5.5719e-01, -5.2973e-01, -2.2978e-01,  1.7316e-02,
         1.2228e+00, -2.3320e-01,  1.8960e-02,  2.8235e-01, -5.6504e-02,
         3.4525e-01, -2.5198e-01,  8.8506e-02, -6.7951e-02,  1.5975e-01,
        -5.9217e-02, -1.8764e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9206, 22.1705,  0.7522,  0.0425,  0.0685,  0.1503, -0.1944,  0.0378,
        -0.1244, -0.2398, -0.1613,  0.0355, -0.0651,  0.0414, -0.1789, -0.1235,
        -0.0298,  0.2399, -0.0223,  0.2482,  0.1775, -0.2286,  0.0549, -0.1441,
        -0.1063,  0.0986, -0.0577, -0.7048,  0.6675,  0.0849,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5188e-01,  7.7490e+00, -5.3834e-01, -3.1047e-01,  1.0062e-02,
         1.5563e-01,  6.2780e-02, -4.0764e-02,  3.9762e-02, -3.8480e-02,
        -1.1978e-01,  3.1811e-02, -9.9975e-02,  3.7823e-02, -3.8383e-03,
         5.9274e-02,  2.2080e-01, -7.0377e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2138e+00, -2.6759e+01, -1.4479e+00, -3.9572e-01,  2.5451e-01,
         3.9427e-01,  2.4599e-02, -8.7488e-02,  1.1176e-01,  2.0911e-03,
         4.9132e-02, -1.3435e-01, -5.0776e-02, -5.0634e-02,  5.0955e-02,
         4.6864e-01, -4.9450e-01, -6.7210e-01,  1.6021e-01, -1.1178e-01,
         4.0055e-02,  1.8484e-01, -2.6958e-02, -4.2945e-02, -1.1629e-01,
         5.5891e-02,  3.6708e-02,  1.3011e-02, -4.0048e-02, -1.3845e-01,
        -1.2992e-02, -1.2434e-01, -2.7568e-02,  7.4269e-02,  3.1193e-02,
        -8.4556e-01, -7.5157e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8243e-01,  8.1765e+00, -1.2426e+00, -9.1819e-02, -3.5351e-02,
         8.7387e-02, -4.8856e-02,  8.2037e-02,  3.3857e-02,  4.8011e-02,
         3.3171e-02, -9.7330e-02,  5.1785e-02,  1.0600e-01,  4.3465e-02,
         5.8025e-02,  5.0213e-03,  7.6849e-02,  5.1440e-02, -2.4112e-02,
         3.0799e-01, -8.7149e-02,  1.2100e-01,  1.5073e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5120e+00,  2.8387e+01,  5.5548e-01,  7.8728e-01,  8.7434e-01,
         1.9509e-01, -4.4888e-01,  4.2138e-01,  9.5284e-02, -3.7271e-02,
         7.0480e-02, -1.0809e-01, -3.8134e-01, -7.0964e-03,  2.4993e-02,
        -3.6868e-02,  2.8663e-02, -4.4945e-02,  5.2216e-02, -2.5892e-02,
         1.8739e-02, -9.8224e-02,  4.9569e-02, -1.9750e-02,  1.9050e-01,
         3.3755e-01, -1.8267e-02,  6.7743e-02, -5.1588e-02,  9.9078e-02,
         4.4152e-01, -1.6453e-02,  1.9004e-01,  2.5602e-01,  1.6563e-01,
        -3.4061e-02, -8.2840e-02, -2.6499e-02,  4.7994e-02,  8.7786e-03,
        -6.6223e-02,  9.0996e-02, -1.9320e-01,  3.9744e-02, -8.1829e-02,
         2.6607e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5877e+00,  5.1312e+01,  3.9389e-01,  9.9758e-01,  1.5208e+00,
         8.0203e-01, -8.0980e-01, -4.8220e-01, -7.5374e-02, -9.0929e-01,
         8.7011e-03, -3.8182e-01,  8.4636e-01, -3.4458e-01, -4.2671e-01,
        -4.0028e-01, -5.3734e-01,  2.5411e-01, -1.7763e-01,  4.7438e-01,
         1.3144e+00,  1.1680e+00,  2.0486e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9687e+00,  5.1973e+01, -2.1832e+00, -8.6156e-01,  1.1142e+00,
         5.9822e-01, -3.9489e-02,  3.7983e-01,  7.2666e-01, -6.9150e-01,
        -6.6917e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6584e+00,  5.8887e+01,  1.8367e+00,  1.6040e+00,  1.2537e+00,
         1.2953e+00,  1.4798e-01,  7.1652e-01, -2.8017e-01,  3.0915e-02,
         3.7850e-01,  6.6532e-01,  3.2763e+00,  4.8469e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0517, 44.5880,  0.6092, -0.2138, -0.2229,  0.5030, -0.2681, -0.7066,
        -0.3944, -0.5719,  0.7910, -2.6100,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.7125e+00, -1.0542e+02, -5.9577e+00, -3.4199e+00, -4.8885e-01,
        -9.9297e-01,  1.4405e+00,  6.1414e-01,  9.4653e-02, -3.9871e-01,
         8.6950e-01, -1.2313e-01,  4.9515e-01, -5.6986e-01,  3.9036e-01,
         3.2765e-01, -7.8398e-02,  6.1419e-01, -1.2953e-01,  1.8028e+00,
         1.5742e-01, -3.3947e-01, -5.7547e-01,  4.9972e-01, -7.4241e-01,
         2.3470e-01, -1.3044e+00,  6.2406e-02,  8.1108e-01,  2.1151e-01,
         2.1200e-01, -5.7120e-01,  1.9025e-01, -1.9263e-01,  1.8126e-01,
         2.8552e-01, -1.8670e-02,  4.5877e-01,  5.3298e-01,  3.7327e-01,
        -3.0094e-01, -1.2403e-01,  7.4910e-02,  2.3337e-01, -3.9264e-01,
         3.5799e-01, -4.0919e-01,  2.9104e-01,  2.4425e-01,  6.6169e-01,
        -5.1227e-02, -5.2446e-02,  3.7105e-01,  4.9877e-01,  1.7258e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-1.1282e+00, -1.7998e+01, -3.0712e-01, -2.5777e-01,  1.3292e-01,
         3.3049e-02,  2.6184e-01, -9.5401e-02,  8.8831e-02,  3.7152e-02,
        -1.0208e-02,  6.0212e-02, -7.7094e-02, -3.1140e-01,  1.0298e-01,
        -5.8013e-02,  1.2736e-01, -7.8589e-01, -9.2932e-02, -1.2627e-01,
        -4.1076e-02,  2.7699e-01,  7.7082e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1445, 32.3470,  3.5661,  0.8996,  0.5308, -0.0719,  0.3114,  0.1225,
         0.9791, -1.3435, -0.0641,  0.6605, -0.9990,  0.0681,  0.0467, -0.1483,
         0.9331,  0.1022,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1195e-02, -1.2484e+01, -1.5215e+00, -1.0259e-01, -2.3581e-01,
        -2.4514e-03,  2.1460e-01,  1.1864e-01,  3.5886e-01,  2.6450e-01,
         2.1811e-01,  2.1257e-01,  8.4565e-02,  1.8198e-01,  3.1283e-01,
         3.0513e-02, -2.1297e-01, -2.0889e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6348e-01, -4.0109e+01, -4.3350e-01,  5.8008e-01,  3.6069e-01,
        -3.5535e-01, -2.1268e-01, -1.1988e-01, -4.3287e-01,  2.0081e-01,
         6.1215e-01,  2.7427e-01, -6.5122e-01, -2.6064e-01, -2.0641e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1730e+00,  3.5667e+01,  9.2525e-01, -9.4899e-01,  2.5740e-02,
         1.2815e-02, -6.2663e-02, -3.5655e-01,  8.2088e-01,  7.3231e-01,
         4.6818e-01,  5.6167e-01, -5.8924e-03, -4.8910e-01, -1.0072e+00,
        -5.3373e-01,  5.9498e-01, -4.6202e-01, -8.3570e-02, -2.2230e-01,
         9.5219e-01, -2.1688e-01, -9.8810e-02,  9.2575e-02, -8.8376e-01,
        -2.9660e-01,  6.0310e-01, -2.2816e-01,  8.3199e-01,  1.0353e+00,
        -1.4723e-01, -4.9618e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3256e+00, -4.6201e+01, -2.2864e+00, -1.3192e+00,  7.6807e-01,
        -7.0000e-01, -3.8163e-01,  5.1210e-01,  4.9364e-01,  4.1652e-01,
        -6.8189e-02,  6.3026e-01, -5.1386e-01,  6.3801e-01,  1.0922e-01,
         2.5033e-01,  2.2670e-01,  8.4855e-01, -2.4744e-01, -6.6998e-01,
         4.7289e-01,  2.2170e-01,  9.4584e-01,  3.5630e-01, -6.1405e-01,
         8.7383e-02,  3.8061e-02, -5.2262e-01,  2.1043e-01,  4.1185e-01,
        -5.6598e-01,  1.2375e-01, -2.0184e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4771e-02,  4.4227e+01,  1.8946e-01,  2.3291e-01,  1.0285e+00,
         4.1407e-01, -4.2641e-01, -6.3756e-01, -2.2081e-01,  3.4995e-01,
        -1.0184e-01, -5.3639e-02, -1.3490e-01, -4.4450e-01, -2.0211e-01,
         1.7665e-03,  2.0290e-01, -1.5575e-01, -3.2302e-01,  1.7731e-01,
        -1.5280e-01,  2.0194e-01, -7.1154e-02, -1.1876e-01, -7.1080e-02,
        -4.3226e-02,  2.0638e-01, -1.5621e-01,  8.6512e-02, -4.0961e-01,
         1.0726e-01,  2.4499e-01,  2.3931e-01,  2.6694e-01,  2.0785e-01,
        -1.4838e-01, -6.3212e-02,  1.6564e-02,  1.4214e-01,  9.3708e-02,
         9.0362e-02,  4.3007e-02,  5.1290e-02,  1.3693e-01,  9.9844e-02,
         2.1360e-01, -1.7303e-01, -1.2251e-02,  6.4931e-02,  4.8287e-01,
        -1.4279e-01,  1.7591e-01,  2.4691e-01,  8.6718e-02,  1.5910e-01,
         2.3814e-01,  2.2563e-01,  5.5373e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6064e+00,  2.4147e+01,  2.0787e-01, -9.8959e-01,  1.0711e-01,
        -4.8753e-02,  3.6014e-02, -1.5951e-01, -1.6995e-01,  4.9406e-01,
        -1.8167e-01,  1.5299e-01, -5.4983e-01, -2.9812e-01, -1.2552e-01,
        -7.4213e-02,  1.8813e-01,  7.3672e-03, -2.9363e-03,  1.4695e-01,
         6.1852e-02,  7.3992e-02, -2.4612e-02,  4.0910e-01, -3.5743e-01,
        -1.6118e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5263e+00, -7.3265e+01,  3.4125e+00,  2.1179e+00, -3.4940e-01,
        -1.1973e-01,  1.1007e+00,  6.3420e-02, -5.0362e-02,  4.7881e-01,
         7.2700e-01,  2.2961e-01,  8.9794e-01, -4.1346e-01,  9.5369e-01,
         4.8895e-01, -4.3999e-01, -1.1145e-01, -1.0412e+00,  9.0232e-01,
        -1.4099e-01,  1.3678e+00, -3.9800e-01, -1.5641e+00,  6.7685e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9747e-01,  2.9801e+00,  1.8705e-01, -3.9842e-02,  7.6339e-02,
        -2.9735e-02,  6.6017e-03,  1.8546e-02, -1.5098e-02, -3.7643e-02,
        -7.2629e-03, -3.5179e-02, -3.9971e-02, -3.9999e-02,  8.3185e-03,
         4.6474e-02, -2.0416e-02,  3.9942e-02, -2.3876e-03, -2.1421e-03,
        -3.3336e-02,  1.2638e-02, -2.2814e-02, -2.2889e-02,  1.6376e-02,
        -2.4899e-02,  1.7542e-03, -1.2044e-02, -7.8822e-03, -7.9672e-03,
         1.5605e-03, -3.2281e-03,  7.1048e-03,  1.2431e-02, -2.9169e-02,
        -6.3531e-04, -3.3023e-03, -1.6681e-02, -3.5851e-03,  8.1234e-03,
        -2.7557e-02, -1.0077e-02,  3.6966e-02,  4.2706e-02,  4.8786e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3542, 11.9402,  0.5944,  0.0866, -0.0342,  0.0481,  0.0773,  0.0997,
        -0.1544,  0.1211, -0.1229, -0.1677, -0.1274,  0.1178,  0.0579, -0.0962,
        -0.2994, -0.0700, -0.0646,  0.0329,  0.1857,  0.0907,  0.3739,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1104,  7.7821, -0.5678, -0.3413, -0.0662,  0.1011, -0.2706, -0.1162,
         0.2304,  0.2198, -0.0195,  0.0220,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-5.1903e-01, -6.3745e+01, -5.0151e+00,  9.4947e-01,  6.1302e-02,
        -2.8734e+00, -5.6862e-01,  1.5911e+00, -1.5988e+00,  1.6075e+00,
         2.3637e+00,  6.5434e-01, -2.6088e-01,  4.7568e-01, -6.7920e-01,
         4.7584e-01,  2.0879e-01,  9.2974e-01,  1.8105e-01,  6.4035e-01,
         5.6113e-01,  1.1873e+00, -3.4480e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8549e+00, -8.3584e+01,  1.0971e+00,  7.2246e-01,  1.4351e+00,
        -2.8991e-01,  1.2932e+00, -9.0286e-01,  1.3166e-01, -6.3851e-01,
        -2.2175e-02,  6.3507e-01, -5.6471e-01, -1.5938e-01,  6.1656e-01,
         1.2207e-01,  2.1757e-01,  2.7646e+00, -2.2979e-01,  2.1911e-01,
         3.3531e-01,  3.2154e-02,  1.1161e+00,  5.4309e-01,  7.7409e-01,
        -5.8405e-02,  1.1365e-01, -1.6688e-01, -1.4376e-01, -1.0212e-01,
         3.8377e-01,  1.7084e+00, -9.3390e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9814e-02, -8.5289e+01, -2.1941e+00,  1.6395e+00, -2.1106e+00,
        -3.5696e-02,  8.3894e-01,  9.2430e-01, -1.0347e+00, -5.4798e-01,
         2.0506e-01, -5.0665e-01, -9.0413e-01, -3.4480e-01, -1.0483e+00,
         5.2849e-01,  6.6301e-02, -2.9138e+00,  1.0535e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.3768, -19.5707,   0.2564,  -0.1257,   1.2229,   0.0711,   0.1429,
          0.1901,   0.1272,   0.1827,  -0.1524,  -0.0759,   0.0265,   0.1385,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4616e-01, -4.7655e+01, -1.6121e+00,  1.2528e+00,  1.2020e+00,
         1.9427e-01, -4.0401e-01,  2.5524e-01, -2.8098e-01, -2.5504e-01,
        -2.6360e-01, -3.4621e-01,  8.3731e-02,  5.2574e-02, -2.1157e-04,
         1.7717e-01,  1.1750e-01, -3.0504e-01, -3.7023e-01, -6.6896e-01,
        -2.4667e-01,  4.2282e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5386e-01,  3.8895e+01,  3.3017e-01, -5.0917e-01, -7.6227e-03,
        -2.4758e-01, -1.2591e-01,  1.5034e-01, -6.1082e-02, -2.0103e-01,
        -1.9541e-01, -6.9518e-01, -8.6569e-01, -3.7549e-01,  5.9361e-02,
         1.0512e-01,  1.9021e+00,  3.9055e-02, -1.2858e-01,  3.1529e-01,
         1.9164e-01, -5.6293e-02,  4.6313e-02, -3.8346e-01, -2.8889e-01,
         1.8300e-01, -1.1146e-01, -3.4747e-01,  2.4236e-02, -5.3293e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4720e+00,  1.1699e+01, -8.6539e-01, -7.9808e-01,  2.0962e-02,
        -3.0952e-03,  2.5921e-01, -3.7219e-02, -8.8721e-02, -2.9707e-02,
         6.9436e-03, -6.3792e-01, -6.8877e-02,  2.2703e-01,  8.8330e-02,
        -1.1340e+00,  5.8103e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.4899, -66.2131,   2.1798,   1.7098,   1.0400,  -2.3399,   1.6501,
          0.2627,  -2.6829,   0.3526,   0.4549,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.2185, -15.3541,  -0.8223,  -0.0746,  -0.5156,   0.5144,  -0.1463,
          0.2858,   0.2069,  -0.1886,  -0.0735,   0.1876,  -0.2126,  -0.3390,
          0.0371,  -0.1921,   0.5392,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1530e+00,  5.5414e+01, -1.7644e+00,  2.6907e+00,  2.0794e-01,
         2.9482e-01,  5.9534e-01,  2.6274e-01, -5.1637e-01,  4.8844e-01,
        -1.4663e-01,  3.6930e-01, -8.3629e-01, -6.8753e-02,  2.2424e+00,
         3.2378e-01,  9.1612e-02, -5.1065e-01, -3.5042e-01,  3.2768e-01,
         3.4271e-01,  5.1348e-02,  7.6284e-01,  3.0416e-01,  4.5279e-01,
        -8.4621e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4879e+00,  2.7796e+01, -1.8606e+00,  1.7082e-01,  2.2897e-01,
         1.9256e-01, -2.5271e-02, -4.4130e-02,  5.4339e-02, -1.7777e-02,
        -2.9308e-01, -3.5191e-01,  3.4640e-02, -3.2972e-01,  3.4668e-01,
        -7.8759e-02, -2.4993e-02,  2.0006e-01, -8.3267e-02, -1.5624e-01,
         1.2558e-01, -2.5510e-02, -6.2704e-02, -4.5720e-02,  4.1011e-01,
         8.2498e-02,  2.1157e-01, -8.4512e-02,  1.5768e-01,  1.0242e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1159e+00, -7.3440e+01,  6.3607e-01,  2.3051e+00, -1.0496e-02,
         2.1484e+00,  2.0084e+00,  2.3040e+00,  1.5692e-01,  1.5425e+00,
         6.3091e-01, -1.2797e-04, -4.3112e-01,  2.9878e-01,  9.1058e-01,
        -6.0791e-01,  1.5032e+00, -1.3761e+00,  1.5377e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-3.9035e+00,  2.3063e+01,  6.4790e-01,  7.0912e-01,  5.7978e-01,
         1.8410e-01,  1.0375e+00, -1.1290e-01, -5.2753e-01, -7.1841e-02,
         2.6278e-01,  4.1977e-02, -1.8252e-01,  3.2958e-01, -6.7827e-01,
         1.8664e-01,  3.4602e-01, -1.7755e-01,  1.6369e-02,  1.9966e-01,
        -7.1831e-02,  1.0860e-01, -9.9055e-02,  3.7555e-01, -7.4135e-01,
         5.2067e-01,  1.4579e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.9499, -80.3649,  -0.2978,   4.7224,  -1.5961,  -2.5455,   0.8411,
         -0.2315,  -1.0468,  -0.3309,  -0.2269,  -0.7945,   2.7166,   0.5319,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5617e-03,  1.0942e+02,  4.9586e+00, -2.5546e+00, -1.3759e-01,
         7.0311e+00,  1.2864e+00,  1.0282e+00,  2.3175e+00,  3.8095e-01,
        -1.2347e+00, -5.6099e-01,  6.5051e-01,  2.0006e-01, -6.3740e-02,
        -4.1055e-01,  7.1451e-01, -7.9672e-01,  8.8309e-01,  7.4321e-01,
        -1.3237e+00, -4.4635e-01, -2.9859e-01, -1.2423e+00, -4.3294e+00,
        -9.3691e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9667, 43.2618, -0.6646,  0.9369,  0.8706, -1.3030,  0.3509,  0.8453,
         0.1229,  1.6952,  0.4586,  0.4094,  1.2220, -2.1099,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2563e-01,  2.8561e+01, -3.3013e-02,  2.7913e-02, -2.4395e-01,
        -1.1672e-02, -4.0776e-01, -5.7082e-01, -4.2549e-02,  1.3296e-01,
        -1.1562e-01, -1.2801e-01,  1.8804e-02, -4.5036e-01, -1.0282e-01,
         5.3527e-02, -5.5819e-01, -1.8542e-01,  2.1514e-03, -1.2500e-01,
        -2.1498e-01, -8.3094e-02, -1.6651e-01, -2.0692e-02,  1.9658e-02,
        -1.3312e-01, -8.2555e-03, -4.2570e-01, -2.4708e-01,  1.7494e-01,
        -3.1872e-01,  5.9481e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5258e+00,  3.9878e+01,  2.0724e+00,  2.9399e-01, -3.6573e-01,
         7.8052e-01, -2.4135e-01, -3.7666e-01,  2.0808e-01, -3.0413e-01,
         7.0314e-01, -1.5651e-01, -1.0438e-01,  1.0665e-01, -2.3594e-01,
         6.3858e-02,  1.2838e-01, -6.9605e-01,  2.6060e-01,  2.2702e-01,
         6.6031e-02, -2.5593e-02,  2.4830e-01,  1.1797e+00, -4.8829e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6591, 31.2839,  3.3957,  2.2496, -0.3920,  0.4302,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6451e+00,  5.8623e+01,  1.4767e+00, -9.3771e-01,  8.0700e-01,
        -6.1963e-01, -4.1114e-01, -1.9985e-01, -7.3666e-02, -3.7687e-01,
        -2.4945e-01,  7.4219e-01, -6.8702e-01, -5.6240e-01, -7.1297e-01,
         4.3690e-01,  6.6696e-01,  1.6116e-01, -7.8993e-02, -3.2299e-01,
        -3.3863e-01, -8.2068e-02, -1.0219e+00,  2.4311e-01, -8.2574e-02,
         2.4009e-01, -1.7484e-01, -4.2369e-01, -1.7671e-01,  2.4586e-01,
         2.8289e-01,  9.1717e-02,  5.9222e-01,  5.4080e-02, -1.5863e-01,
        -2.6920e+00, -7.7122e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1890e+00, -1.1566e+01, -8.6555e-02, -3.6600e-01, -1.1244e-02,
        -5.4976e-02, -1.1019e-01,  4.7975e-02,  1.0202e-02,  4.2315e-02,
        -4.2204e-02, -6.4256e-02,  3.0877e-02,  4.3001e-02, -1.4317e-01,
         4.5118e-02, -5.6196e-03, -8.9117e-02,  1.9122e-03, -2.2643e-02,
         3.3110e-01, -2.5220e-02, -6.8728e-02, -1.2813e-01, -4.2473e-02,
         1.2033e-01,  8.5772e-02, -5.1969e-02,  2.3327e-02,  2.1325e-02,
        -1.5325e-02, -4.5748e-02, -3.1498e-02, -4.2093e-01,  1.9608e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.4310,  57.4900, -11.5246,   2.4948,   0.3159,   0.1505,   0.4675,
         -0.4249,  -0.4145,   1.4458,   0.9484,  -2.5196,  -2.6634,  -1.1945,
          1.9064,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.8670e-01, -2.6537e+01, -1.2073e-01, -1.6735e-01, -2.6505e-01,
        -1.4845e-01, -6.2860e-01,  3.0550e-01,  2.7156e-01,  6.2377e-02,
        -5.0382e-03,  1.0234e-01,  7.0372e-02, -7.6488e-02, -2.3606e-03,
        -1.7406e-01, -2.8063e-02, -4.8711e-02,  1.5893e-01,  1.2187e-01,
        -1.0319e-01, -2.9772e-01,  7.4455e-02, -1.0866e-01,  6.1127e-02,
        -1.4294e-01,  2.1578e-03, -1.3417e-01,  1.3420e-01, -1.2433e-02,
        -1.6082e-01,  3.8897e-01, -1.0075e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4036e+00, -9.8656e+01,  4.2227e+00,  1.3254e+00,  5.7254e-01,
        -1.2099e+00,  1.2858e+00,  9.8144e-01, -2.2525e+00,  8.2187e-02,
         1.2605e+00,  1.2407e-01, -3.6477e-01,  3.9669e-01,  4.6587e-01,
         1.3357e+00,  2.2177e+00,  1.7500e+00,  1.4046e+00,  4.4518e-01,
         2.2783e+00,  7.5846e-01,  1.4769e+00, -5.1747e-02, -7.0397e-01,
        -1.2040e+00,  3.1104e+00,  6.7542e+00,  1.8746e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 4.7345e+00,  3.7980e+01, -8.2540e-02, -1.0797e+00, -1.2929e+00,
        -3.6942e-01, -6.8177e-01, -1.3129e-01, -5.4047e-01, -1.3805e+00,
         3.1600e-02, -1.5663e-01, -6.9867e-02, -1.9887e-02, -1.9024e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6151e-01,  2.1329e+01, -5.9181e-01, -8.4679e-01, -1.3394e-01,
         1.9145e-01, -1.4526e-01, -1.0836e-01,  1.2751e-01, -9.9150e-04,
        -1.5413e-01, -6.8047e-02, -1.4143e-01,  8.6682e-02,  1.4453e-01,
        -4.6049e-02, -8.9616e-02,  1.6781e-02,  4.6707e-02, -4.5669e-02,
        -8.1393e-02,  1.0796e-02, -1.5785e-01,  5.5787e-02,  1.0781e-01,
         9.4305e-02,  2.1527e-02,  1.1037e-01, -7.6638e-02,  1.0149e-02,
         9.4432e-03, -8.4485e-02, -1.0182e-01, -1.0664e-02, -1.4247e-01,
        -1.1574e-01, -3.9020e-02,  6.0031e-02, -1.0252e-01, -1.5276e-01,
        -1.3344e-01,  5.2098e-02,  1.6030e-02, -2.9561e-02,  1.1601e-01,
         5.2609e-02,  1.5945e-02,  3.2522e-02,  6.3435e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6783e+00,  2.4699e+01,  2.3503e-01, -1.8405e-01,  3.4785e-01,
        -7.9779e-02,  3.8148e-02,  1.2128e-01,  4.3633e-01, -8.2739e-02,
        -3.5598e-02,  2.1304e-02,  8.0452e-02,  2.1694e-01, -6.5415e-02,
        -1.2626e-01,  1.1177e-01,  1.7107e-01,  1.3736e-01,  9.3292e-03,
        -2.4700e-02,  2.1863e-01, -1.3124e-01, -2.1960e-02, -2.7556e-02,
        -5.2447e-02, -3.6160e-02,  3.4590e-01,  1.7236e-01, -1.0699e-01,
        -5.4449e-02,  6.6450e-02, -9.6455e-02,  4.3271e-01,  4.8259e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0722e+00, -1.8296e+01, -3.1626e-01, -3.0829e-01, -1.2690e-01,
        -2.9467e-01,  1.4388e-01, -6.1072e-02, -1.6641e-01, -1.2696e-02,
         1.8398e-01,  7.8658e-02,  1.2820e-01, -1.3954e-01, -6.4064e-02,
         4.0672e-04,  1.6038e-02, -5.4875e-02,  1.9027e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4316e+00,  1.0278e+02, -4.7758e-01,  4.3384e+00,  1.0073e+00,
         1.3108e+00,  1.0483e+00,  1.2525e+00,  1.6367e+00,  1.5404e-01,
        -8.3691e-01,  2.9592e-01,  2.2070e+00,  7.4426e-01, -2.1200e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6666e+00,  2.3866e+01,  7.6530e-01,  6.2436e-02, -6.8105e-01,
        -6.4226e-01, -5.1065e-01, -6.6828e-02, -2.7081e-01, -1.0662e-01,
         3.7494e-02,  3.4743e-01, -2.4449e-01, -1.4041e-01,  2.2803e-01,
        -3.2119e-02, -1.8399e-01, -1.7820e-01, -1.8824e-01,  7.2310e-02,
        -1.8878e-01, -1.4263e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.2501e-01,  5.1093e+01,  4.2691e-01, -4.7684e-01,  6.3612e-01,
        -4.1898e-01, -5.4171e-01, -1.0816e-01,  9.0368e-01,  2.1332e-01,
         7.5262e-02,  2.1237e-02, -2.5402e-01, -5.6199e-02, -3.4738e-01,
         2.6224e-01, -1.5196e-01, -3.0068e-02, -2.5782e-01,  4.4786e-01,
         2.7715e-01,  5.3383e-01, -6.3659e-01, -4.4660e-01,  9.0527e-01,
         2.2875e-01, -1.8048e-01, -2.9020e-01, -9.1788e-02,  3.7406e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5170, 23.1051,  0.5194, -1.0623,  2.3912, -0.9125, -0.2515,  0.7178,
         0.2245, -0.5041,  0.6299,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.2529e-01, -2.2380e+01,  1.6617e-01,  7.0179e-01, -3.0561e-01,
         2.9235e-01,  1.8547e-01, -1.3506e-01,  3.7099e-02, -9.9445e-02,
         1.7871e-01, -2.6783e-01, -6.2877e-02, -1.8586e-01, -3.7138e-02,
         4.9724e-02, -1.7850e-01,  2.6977e-02, -4.9915e-02, -1.0479e-02,
         1.1161e+00,  1.7910e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0866e+00, -3.8046e+01, -1.5150e+00, -9.4680e-02, -1.7246e-01,
        -6.9197e-01, -1.3043e-01,  1.5136e-01,  2.4257e-01, -9.9303e-02,
         4.1884e-01, -3.8241e-01,  5.1894e-01,  1.8781e-01, -1.3265e-01,
         9.2282e-01, -1.4706e-01,  1.8653e-01, -2.9319e-01,  2.4653e-01,
        -2.0952e-04,  7.0166e-02, -1.2613e-01,  6.3587e-03,  5.7799e-02,
        -2.5905e-02, -5.5107e-02, -3.7170e-01, -3.1031e-01,  1.2205e-01,
        -4.0297e-01, -6.0200e-02, -1.6018e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0728e-01, -3.5942e+01, -2.4746e+00,  5.5296e-01,  4.7339e-01,
        -3.6963e-02,  2.6094e-01,  3.8793e-01,  1.5366e-01,  2.2657e-01,
        -9.1401e-02, -5.2138e-02,  3.6750e-01,  3.5155e-01,  8.0916e-01,
         1.9938e-02,  3.4625e-03,  3.5732e-02,  2.0963e-01,  7.2137e-02,
         5.7806e-02,  4.0860e-01, -3.4591e-01,  1.3677e-01, -5.4988e-01,
        -3.8823e-01,  7.2002e-01,  5.4467e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.1609e-02, -4.7433e+00, -4.5289e-01, -5.2145e-02,  5.7989e-02,
        -1.4299e-02, -1.4625e-01,  2.4869e-03,  1.0955e-01,  4.8958e-02,
         3.4373e-02, -4.5817e-02,  1.1172e-01,  6.5327e-03, -2.7394e-02,
         2.1963e-02,  3.8958e-02,  4.1273e-02,  1.6551e-02, -2.2839e-02,
         1.6426e-02,  7.3969e-03, -2.8334e-02,  6.4300e-02,  1.9177e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 1.7966e+00, -5.1212e+01, -5.7540e-01,  2.6554e+00, -6.5467e-01,
        -1.1577e+00, -4.8963e-01, -3.3819e-01, -8.1498e-02, -1.9218e-01,
        -3.2241e-01, -7.5456e-02,  7.7905e-02, -2.8716e-02, -1.9997e-01,
         1.6403e+00,  1.7510e-01, -2.6330e-01, -1.4196e-01, -2.4836e-01,
        -3.5184e-03, -1.5248e-01,  3.4436e-01, -2.0715e-01, -3.6994e-01,
        -4.5581e-02, -1.2489e-01, -1.3178e-01, -7.1980e-02,  1.2336e-01,
        -7.3951e-02,  2.1013e-01,  4.5689e-02, -6.0683e-03, -3.4140e-01,
         2.1826e-01,  2.8059e-01,  3.7645e-01, -7.5535e-02, -1.4411e+00,
        -5.6690e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1120e+00,  6.1772e+01, -1.0635e+00, -1.5830e+00,  2.4110e-01,
         7.4476e-01, -5.1408e-01,  3.9460e-01, -1.4832e-01, -7.1297e-02,
        -1.9017e-01,  6.4482e-02,  5.8134e-01, -3.0609e-01,  9.3497e-01,
        -2.3649e-01, -1.7244e-01, -8.0806e-01, -1.6857e-01, -4.0278e-01,
        -1.0961e-02, -1.0217e-01,  7.6517e-02,  8.5516e-01,  4.3765e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9885e-01,  2.0574e+01,  8.5377e-01, -5.3501e-01,  2.6127e-01,
        -2.1988e-01, -6.1079e-01, -3.6848e-02, -2.6019e-01,  1.3211e-01,
        -1.8962e-01, -8.5280e-02, -7.4813e-03, -1.5964e-01, -3.0316e-01,
        -1.1242e-01,  2.9434e-01, -5.4439e-01, -4.1016e-01, -1.6483e-02,
        -1.5832e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2164e+00,  8.1700e+01, -2.6451e+00, -1.6633e+00, -1.5999e+00,
        -1.0539e+00,  8.2452e-01,  1.0369e+00,  1.1336e+00, -2.1970e-01,
         3.0412e-01, -5.8079e-01, -7.3830e-02, -1.4214e+00, -9.9314e-01,
        -9.6149e-01,  1.4079e+00,  7.2675e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5187e-01,  1.9143e+01, -7.8899e-02, -3.4588e-01,  1.7456e-01,
         2.7555e-01,  3.6175e-01,  5.8137e-01,  2.1122e-01,  7.0690e-02,
        -1.0454e-01, -2.1990e-01,  4.2892e-02, -3.5286e-02,  3.1572e-02,
         8.1315e-03,  1.0067e-01,  2.1849e-01,  1.9514e-01, -7.2338e-02,
         6.3469e-02, -1.8068e-01,  1.1650e-01, -7.1191e-02,  1.0947e-01,
         1.9178e-02,  1.6609e-01, -7.2233e-02,  8.1470e-02, -1.1278e-02,
         1.6085e-01, -3.9511e-02, -1.6045e-02, -6.3101e-02, -6.8138e-02,
         1.4342e-02,  4.9738e-01,  1.4230e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.6991, -21.0751,   0.2726,   0.1708,  -0.5906,  -0.2274,  -0.2399,
         -0.9194,  -0.3887,  -0.1164,   0.7485,   0.2211,   0.1524,   0.1811,
         -0.4617,   0.2289,  -0.1294,   0.7452,   0.2739,  -0.3431,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.4279, -22.9045,  -0.5307,  -0.3677,  -0.3602,  -0.1884,  -0.1171,
          0.1865,   0.0988,   0.1124,  -1.0079,   0.1580,  -0.5426,  -0.3668,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8418e-01,  1.1714e+01, -3.4066e-01, -2.0728e-01,  1.9577e-01,
        -2.4162e-02, -1.5626e-01, -3.4366e-02,  1.1827e-01, -1.1973e-01,
        -1.5580e-01, -1.7568e-02,  2.6826e-03, -2.0720e-02,  8.2919e-02,
         7.9284e-02,  3.7228e-01,  1.2040e-01,  1.0574e-01,  1.4252e-01,
         7.2329e-02,  2.2383e-01, -7.0245e-02, -2.1724e-01,  6.4724e-02,
        -2.6354e-01, -1.4496e-02,  7.9008e-02,  5.7769e-03,  2.7310e-01,
         1.8489e-01, -9.0668e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7042e-01, -1.1715e+02, -8.3788e+00, -7.5002e+00, -4.3410e+00,
        -9.8942e-01, -1.2958e+00, -7.2789e-01, -8.7703e-01,  1.0359e+00,
         3.6141e-02,  7.4001e-01, -5.2681e-01, -1.2948e+00,  5.4547e-01,
         4.0994e-01, -5.5600e-02,  1.3597e+00,  1.5600e+00,  3.0058e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5336e-02,  2.8774e+00, -7.2593e-02, -1.3353e-01, -1.0591e-03,
         4.6518e-02, -2.5322e-03, -4.8597e-02, -7.0491e-02, -8.2521e-03,
        -4.1164e-02,  1.7021e-02, -3.4632e-03,  1.9244e-02,  4.3993e-03,
         1.2997e-03, -5.5902e-03, -2.2214e-02, -2.1263e-02,  2.9831e-02,
        -1.3572e-02,  8.9357e-02, -1.5231e-02, -2.1689e-02,  1.1442e-02,
        -3.9095e-02, -1.7253e-02, -3.4842e-02, -2.2099e-02, -2.9403e-02,
        -5.5426e-04, -5.8958e-03,  6.8176e-02,  3.1013e-02,  2.8694e-02,
        -3.5651e-02,  1.6102e-05,  9.2106e-03,  1.1737e-02, -3.1627e-03,
         3.6448e-03,  2.4157e-02,  5.0863e-02,  1.1074e-02,  1.5296e-02,
         8.2716e-02, -4.4260e-02,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0939e-01,  1.8088e+01,  3.3695e-01, -4.2191e-02,  9.1610e-01,
        -2.5685e-01,  2.1858e-01, -1.9695e-02, -3.2286e-01,  2.5199e-01,
        -3.9766e-01, -2.6495e-03,  9.3921e-02,  1.7954e-01, -7.2500e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.3466e-01, -5.6077e+01, -1.2500e+00, -5.7879e-01, -1.8839e+00,
         1.1702e+00,  8.0630e-01, -2.5566e-01,  4.4762e-01, -5.8162e-01,
         2.1588e-01, -1.3517e-01,  1.4736e-01, -9.5331e-01,  1.4633e-01,
        -3.3249e-02, -9.5731e-01, -4.4975e-01, -3.6928e-01,  1.1576e+00,
         1.9990e-01,  7.6796e-01,  2.7695e-01, -1.4397e-01,  6.8878e-03,
        -6.7217e-02, -5.9771e-01, -2.7619e+00,  9.9478e-02, -4.1533e-01,
        -3.4661e-01,  2.6183e-01, -2.5803e-02, -7.1482e-02, -1.9109e-01,
        -3.7184e-01,  1.8631e-01,  1.5115e-01,  5.1157e-01, -8.2002e-02,
        -2.0935e-01,  4.6469e-02,  2.2268e-01,  8.0211e-02, -2.2707e-02,
         7.5305e-01, -6.9965e-02, -1.6930e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 6.6599e+00,  9.2828e+01,  3.7092e+00,  1.0794e-01,  1.6375e+00,
         9.7786e-02, -6.4942e-01,  2.6157e+00,  2.4172e-01, -1.8675e-01,
        -9.8274e-01,  1.6342e+00, -6.2479e-01, -6.0449e-02, -5.2352e-01,
        -4.9140e-01,  2.5042e-01, -7.8150e-01, -9.2931e-01, -1.2146e-01,
         1.1597e+00,  2.6226e-01,  1.7108e-01,  9.4143e-01,  2.8390e+00,
         1.2203e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9923e-01, -6.0583e+00, -4.7343e-02,  4.1657e-02, -1.0796e-03,
        -1.1056e-01,  2.7243e-01,  3.9288e-02, -2.2111e-01, -7.7310e-02,
        -1.8825e-01, -2.2338e-02, -2.7138e-02, -1.2222e-02,  1.5564e-02,
        -3.7520e-02, -3.1563e-03,  9.5869e-02, -1.0700e-01,  1.5713e-01,
        -1.8169e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.4966e-01,  1.0050e+01,  5.5193e-01,  4.0703e-01, -1.2778e-01,
        -2.0991e-01, -1.4733e-01,  2.3654e-02, -1.0937e-01, -6.0159e-02,
        -9.9532e-02,  4.6239e-02, -4.1329e-02, -9.8383e-02, -7.9784e-02,
        -3.3280e-02, -1.5847e-02, -5.1511e-02, -4.9728e-02, -2.0265e-01,
        -1.0490e-01,  8.8617e-03, -1.2123e-02, -4.3112e-03,  4.5499e-02,
         2.4964e-02, -6.8912e-02,  4.6170e-02, -1.6808e-02,  2.1548e-02,
        -9.1491e-03,  8.7748e-03,  6.9250e-02,  3.1696e-02, -1.8173e-02,
        -2.3305e-02,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3949e-01, -8.1557e+01, -9.5546e-01, -6.5472e-01, -9.2844e-01,
         4.6990e-01,  3.3331e+00, -7.7571e-01, -1.3628e+00, -1.4790e+00,
        -2.9081e+00, -6.0052e-01,  9.2684e-01,  7.1767e-01, -3.9302e-02,
        -8.9376e-01, -4.0250e-01,  2.8682e-02,  1.5500e+00,  9.7301e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6057, 55.1475,  0.9490, -0.8562,  2.0484, -0.1692, -0.3058, -0.4065,
        -0.1642, -1.0145,  0.8106, -0.3602, -0.6745,  0.2090,  0.4884, -0.0585,
        -0.7311,  0.2132, -1.1758, -1.7695, -1.1660,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0819e+00, -2.4773e+01,  1.1441e-01,  3.4742e-01,  3.8199e-01,
        -3.6265e-01, -2.7413e-01,  1.4703e-01, -5.5216e-01, -2.1661e-02,
         1.1837e-01, -8.2117e-01, -1.0776e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9267e+00,  3.2982e+01,  6.1490e-01, -7.2369e-01,  1.1729e-01,
        -2.5487e-01, -1.4142e+00, -2.0718e-02, -4.3118e-01, -5.7858e-01,
         2.0442e-01, -2.6933e-01, -1.3953e-02,  6.3918e-02,  4.9202e-01,
         3.5084e-01, -3.0448e-01, -3.2870e-01, -3.1942e-02,  6.4793e-02,
         2.4287e-01,  4.5918e-01,  2.4677e-03, -2.3348e-01, -7.4068e-02,
         3.2409e-02, -7.9566e-01,  5.7616e-01,  1.2610e-02, -1.0166e-01,
        -1.7026e-01, -4.3845e-03,  1.6308e-01, -3.1478e-01, -1.8602e-01,
        -9.4218e-01,  1.1687e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5378e+00, -1.1771e+02, -1.7934e+00, -1.6430e+00,  1.0145e+00,
        -1.0653e-01, -1.6292e+00, -4.7455e-01,  9.2711e-01,  2.3345e+00,
        -3.1015e-01, -3.3989e-01, -1.9485e+00, -6.0953e-01, -1.2786e-01,
        -1.3493e+00, -8.2401e-01, -6.5672e-01,  9.4727e-01, -1.3884e+00,
        -6.7578e-01, -1.4143e+00,  2.1239e-01,  1.3808e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.8020e+00, -5.1081e+01,  2.6848e-01,  1.4351e+00, -1.8806e+00,
         4.1783e-01,  3.1296e-01, -8.4244e-01, -7.2006e-01, -4.4823e-01,
         1.7881e+00,  3.6039e-01, -9.5523e-01, -1.4344e-01,  6.8073e-01,
         1.9322e+00, -7.4026e-01, -6.1532e-01, -2.0514e-01, -2.9359e-01,
        -1.4180e-01,  4.5138e-01,  7.9539e-02, -2.6545e-01,  4.7264e-02,
         4.3080e-01,  2.5106e-02,  4.8623e-01,  7.4337e-02, -2.6009e-01,
         1.2664e+00,  3.2768e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9875e+00, -9.2457e+01, -6.4925e+00,  2.0711e+00, -2.3333e-01,
         2.5471e+00,  2.8113e-01,  1.8231e+00, -2.1509e+00, -4.6416e-01,
        -7.4760e-02,  1.1939e-02,  2.3733e-01, -1.6424e-01, -2.0584e-02,
        -6.0086e-01, -5.8224e-01, -1.2029e-01, -3.7236e-01, -9.0548e-02,
         5.3425e-01,  2.1544e-01, -5.9795e-03,  8.8531e-01, -2.2385e-01,
        -1.5912e+00,  5.7872e-01,  3.8482e-01, -2.3508e-01,  1.2408e+00,
         5.5500e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9186e-01,  7.1179e+00,  4.4474e-02, -2.4877e-01, -4.9104e-03,
        -7.5553e-02,  6.0010e-02, -1.6059e-01, -5.8214e-02, -4.2908e-02,
         3.1877e-01,  1.7663e-04, -7.7760e-02, -8.5099e-02,  4.5666e-02,
         4.8277e-02,  5.4620e-02, -5.0108e-02,  2.6684e-02,  3.4593e-02,
         2.5491e-01,  1.0041e-01,  6.4551e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8426e+00,  5.4028e+01,  1.0772e+00,  3.0910e-01,  1.9145e+00,
         9.7567e-01,  2.3063e-01,  3.2243e-01,  2.2008e-01,  2.3466e-01,
        -2.5815e-02, -1.9870e+00, -7.1836e-01, -5.6805e-01, -1.8465e-01,
        -2.6033e-02, -7.1684e-01, -1.7383e-02, -1.4505e-01,  1.3420e-01,
         6.0724e-02,  3.3212e-01,  2.6919e-01,  3.7793e-01,  3.2909e-01,
         6.9766e-01,  9.6874e-01,  6.3448e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 4.4650e+00, -7.1701e+01,  1.2809e+00, -1.2336e+00, -1.0936e+00,
        -3.9758e-01, -1.4326e+00,  7.4967e-01,  6.2729e-02,  5.4659e-01,
        -3.9358e-01, -1.4835e-01, -6.7994e-01, -1.0517e-01, -5.1973e-01,
        -2.2180e-01, -9.1372e-02, -9.2071e-02,  2.3610e-01,  1.1721e+00,
        -7.6379e-01, -4.3051e-01, -3.8909e-01, -7.8084e-02, -2.9074e-01,
        -2.0125e-01, -1.4212e+00, -1.1811e+00, -2.2960e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.3836e-01,  5.4313e+01, -6.4674e-01,  4.7686e-01,  4.4461e-01,
         8.8353e-02, -2.8905e-01, -1.2780e-01,  4.9575e-01,  3.9165e-01,
        -3.3999e-01, -8.1321e-02,  1.3642e-01, -5.8428e-01,  6.6488e-01,
        -1.2159e-01,  4.4934e-02,  9.2769e-01, -1.5997e-01, -6.7784e-01,
        -4.4043e-01, -3.2650e-01,  2.6238e-02,  1.8606e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -2.8393, -78.5120,   2.5062,   6.1014,  -2.2314,   1.0290,  -0.1187,
          1.4452,  -0.3154,   1.1474,   2.0617,  -0.9995,   1.5680,   0.9085,
          0.7074,   0.5041,   0.7180,  -0.0955,   1.4356,   0.5060,  -0.2841,
         -0.5657,   0.3232,  -0.2434,  -0.1816,   0.2919,   0.3119,   0.1235,
         -0.4020,  -0.1173,   0.9329,   1.2301,   0.3653,   0.5834],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6463e+00,  1.1913e+02, -3.7292e+00, -1.0893e+00, -2.0239e-01,
         2.0992e-02,  1.4397e+00, -9.6296e-02,  1.2508e+00, -5.9676e-02,
         1.8231e-01, -1.2679e+00,  1.4605e+00, -1.1476e+00,  3.8031e-01,
        -6.1889e-01, -3.4830e-01,  3.6595e-01, -6.1729e-01,  1.3036e+00,
        -1.4982e+00, -2.6067e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9638e-01, -5.6481e+00,  8.8232e-02,  4.9013e-02, -3.4080e-01,
        -1.1247e-01, -1.7315e-02,  5.1004e-02,  9.6916e-02, -1.0343e-02,
        -2.8975e-01,  2.0009e-01,  1.0557e-02,  2.1503e-03, -2.7072e-02,
        -1.3318e-02,  8.6893e-02,  2.0291e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0540e-01,  1.0151e+01,  3.5849e-01,  6.6882e-03, -1.8064e-01,
         2.0736e-01,  3.0380e-02, -7.7842e-02,  7.7175e-02,  7.1674e-02,
        -6.0373e-03,  6.2678e-02, -1.4712e-01, -4.4470e-02, -7.3713e-02,
         3.6714e-03,  1.3914e-03, -2.4528e-01, -1.6620e-01, -3.4308e-02,
        -1.7259e-01, -1.9531e-01, -1.2797e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1406e+00,  4.6392e+01,  7.8499e-01, -5.9696e-01, -1.0213e+00,
        -1.4675e-01, -2.5004e-01, -8.2764e-01, -5.6119e-02,  8.0116e-02,
         6.1280e-04,  1.9304e-01, -3.4577e-01,  6.7319e-01,  4.1209e-01,
        -4.5953e-01,  3.8443e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.1854e+00,  8.5628e+01,  3.3691e+00,  1.8866e+00,  2.6850e+00,
         3.5980e-01,  6.4936e-01, -1.1498e+00, -6.0616e-01, -8.4472e-01,
         5.4504e-01,  1.2678e-01, -1.3910e-01,  8.6784e-01,  3.1593e-01,
         1.7527e+00, -6.0165e-02,  3.5823e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3663e+00,  3.2531e+01, -1.9794e+00, -9.3849e-01, -4.5455e-01,
         8.1296e-03, -4.3044e-01,  6.4530e-01, -1.7689e-01,  3.4785e-01,
        -3.0303e-01, -1.0317e-01, -3.1713e-01, -3.5100e-01, -3.7835e-02,
        -9.3023e-01, -3.9562e-01,  8.0128e-01,  2.4172e-01, -5.2490e-01,
        -5.7818e-01, -2.5741e-01, -3.2304e-01, -3.6882e-01, -1.6538e-01,
         5.1130e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4811e+00,  5.9565e+01,  8.1552e-01,  8.1267e-01,  7.8427e-01,
        -6.8092e-02, -6.4268e-01,  7.1176e-01,  7.5434e-01, -1.1717e+00,
         4.6637e-01, -3.0495e-01,  1.9312e-01,  5.2809e-01, -4.9127e-01,
         1.7666e-01, -6.7471e-01,  2.3887e-01, -1.4303e-01, -4.2004e-01,
         8.7497e-02,  6.0451e-01,  4.1926e-01, -1.4946e-02,  2.1310e-01,
         3.0401e-01, -1.6402e-01,  2.1656e-01,  5.1108e-01,  9.7054e-01,
         8.3805e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3808, 41.3252,  0.6836, -1.0587, -0.1802,  0.9042,  0.6173,  1.1060,
        -0.8956, -0.2132, -0.1514, -0.1384, -0.8910, -0.1794, -0.4944, -0.4917,
         0.4821, -0.7380,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1878e+00, -6.6788e+01,  2.2331e+00,  5.8844e-01,  1.4895e-01,
         1.9816e-03,  5.0612e-01,  7.8264e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 7.4096e-01,  7.2503e+00, -4.3759e-03, -2.6870e-01, -1.8748e-01,
         1.9789e-01, -1.0513e-01,  3.5197e-03, -1.3409e-01, -4.3494e-02,
        -3.9218e-02, -1.3896e-01,  7.6118e-02, -4.8258e-02, -3.4862e-02,
        -2.6759e-03,  1.2560e-03,  1.7902e-02,  4.5143e-02,  1.5619e-01,
         1.7552e-02,  9.9212e-02, -2.6155e-02,  1.6749e-02,  1.0729e-02,
         1.7871e-02, -2.5963e-02,  3.4398e-02, -6.3213e-02,  2.5149e-02,
         8.8818e-02, -2.1129e-02, -6.3890e-03,  2.7978e-02,  3.7504e-02,
         6.4208e-02,  3.9178e-02,  1.2118e-01,  9.0283e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0423e+00, -8.5213e+01,  2.5813e+00,  2.2154e-01, -2.3301e+00,
        -1.7725e+00, -8.9347e-02,  1.6954e-01, -9.1048e-01,  2.4763e+00,
        -5.1380e-01,  9.2150e-02,  3.2385e-01, -8.3912e-03, -9.5450e-01,
        -1.8367e-01,  4.2973e-01,  7.6191e-02, -3.0438e-01, -3.7042e-01,
        -2.4723e+00, -1.7577e+00, -2.5649e-01,  1.4671e-01, -2.2320e-01,
         6.3460e-01, -2.4001e-02,  2.5698e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.1236, -21.3642,   0.9242,  -0.2622,   0.3345,  -0.1334,   0.5837,
          0.3969,  -0.4187,  -0.3333,  -1.4623,   0.4653,  -0.1797,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2082e-02,  2.1020e+01, -1.3523e-01,  1.0832e-01,  2.8284e-01,
         3.1836e-02, -3.1477e-01,  2.1335e-01, -8.3979e-02,  1.1595e-02,
        -1.6487e-01, -2.8418e-01,  2.6382e-02, -8.7557e-02, -2.3603e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3347e-01,  1.1630e+01, -1.4253e+00, -2.6775e-01,  3.2759e-01,
        -3.3785e-01, -2.2499e-01, -2.8416e-01,  1.3160e-01, -8.1787e-02,
         8.8423e-02, -1.4687e-02,  2.8111e-01,  1.0622e-01,  3.5627e-01,
        -8.4309e-03,  5.5755e-03, -1.6973e-02, -4.5470e-03, -1.3309e-01,
        -5.7721e-02, -6.9587e-02,  5.7731e-02,  1.0206e-01,  1.5413e-01,
         4.2807e-02, -2.5870e-01, -1.1180e-01,  4.4193e-02,  2.0456e-02,
         5.0368e-02, -8.5964e-02,  7.9575e-02,  2.7369e-01, -7.0641e-02,
         1.2605e-01, -5.8350e-02, -5.0358e-02, -2.8274e-02, -3.5690e-02,
         5.7974e-03, -1.0877e-01, -1.7861e-02,  9.1019e-02,  1.7266e-01,
         1.9606e-01, -3.8630e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1274e+00,  6.4482e+01,  4.5620e-01, -1.9767e-01, -7.6937e-01,
         3.1020e-01, -1.7439e-01, -6.5071e-01, -1.6890e+00, -1.2386e-01,
         3.2205e-01,  4.3532e-01,  1.6533e-01, -4.2353e-01, -8.2566e-02,
        -2.2530e-01,  1.6204e-01, -7.3837e-01,  1.0833e-01, -9.6902e-02,
         7.1321e-02,  9.2855e-02,  5.9412e-01, -8.3954e-02,  3.3623e-01,
         9.7511e-02,  9.6719e-01,  2.3327e-01, -2.8589e-01,  3.3732e-01,
         8.6614e-01,  1.3002e-02, -1.4694e-01, -1.3053e-02, -1.6469e-02,
         2.0465e-03, -4.2502e-01,  2.6632e-03,  1.8402e-01,  7.7448e-01,
         6.7168e-01, -6.6191e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7919e-02,  6.6876e+00,  1.4988e-01, -2.6370e-01,  3.2167e-02,
         1.8147e-02,  1.6199e-01, -1.1280e-02,  1.0819e-02, -2.1588e-01,
         1.5949e-02,  2.0475e-01, -6.0386e-02, -3.4374e-02,  1.2630e-01,
        -3.0019e-02,  3.4590e-01, -5.4466e-02,  9.2995e-03,  1.0273e-02,
        -3.5604e-03, -5.8901e-02, -3.0207e-02, -1.1566e-01, -2.1600e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0452e-02,  2.0167e+00, -4.6090e-02, -2.3180e-02, -2.0172e-02,
        -6.7664e-03,  3.4530e-02,  4.0114e-03, -1.1048e-02, -1.3022e-03,
        -6.1342e-03, -1.6918e-02,  8.3708e-04, -1.4982e-03, -1.9718e-02,
         3.9261e-02,  8.0064e-03,  4.1520e-02,  1.7784e-02, -4.4454e-02,
         2.2555e-02,  6.5765e-02,  1.7757e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1098e+00,  8.0643e+01,  2.2845e+00,  1.8907e+00, -1.2630e+00,
         1.8490e+00,  6.2662e-01, -6.5602e-02, -1.0390e-02,  3.2154e+00,
        -8.0146e-01, -1.4208e-01,  1.0057e+00,  3.1525e-01, -1.5086e-01,
        -1.7178e-01, -1.2014e+00, -5.4368e-01, -1.0361e-01, -4.8485e-01,
        -1.9860e-02, -7.9334e-01,  1.8323e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3075,  9.8085, -0.0854,  0.1395, -0.0492,  0.1336,  0.1016,  0.2592,
         0.3094, -0.1144,  0.1694, -0.3307, -0.2057,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7118, 54.2905,  2.5739, -2.8235, -1.8095, -0.9082, -1.9655, -0.9798,
         0.2798,  0.8057, -0.4087, -0.1177, -2.0685, -0.1761,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.9735e+00, -6.2279e+01, -3.4277e+00,  6.2103e-01, -9.7751e-01,
        -1.2867e+00, -4.1066e-01,  3.6613e-01,  4.7506e-01,  6.9264e-01,
         4.8066e-01,  6.6742e-01,  3.6593e-01, -2.7796e-01,  1.2528e-01,
        -2.7373e-01, -8.0096e-01, -6.2038e-01,  5.2210e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 3.2332e+00,  5.4124e+01, -2.4477e+00, -8.2962e-02, -3.7750e-01,
         7.5944e-01,  1.5494e-01, -1.1575e-01, -3.3697e-01,  4.5319e-01,
         2.3689e-01,  9.3764e-02,  1.5406e-01,  5.8628e-01,  8.3098e-02,
        -2.4319e+00,  2.4434e-01, -3.6362e-02, -2.7624e-03,  1.0365e+00,
        -6.9438e-01, -2.6595e-01,  1.5175e-01,  8.9437e-02, -9.0667e-01,
        -2.0497e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.7480e+00, -7.8645e+01,  1.8767e+00,  2.2836e+00, -7.0861e-01,
        -5.9705e-01, -4.0732e-01,  2.5596e-01, -5.7154e-01,  6.8289e+00,
        -6.6886e-02, -7.2863e-01, -2.3438e+00, -3.1378e+00, -1.5093e+00,
         1.7854e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3588, 28.9207, -0.1474,  0.7755, -0.8325,  1.2006,  1.4164, -0.0733,
        -0.2816,  0.4699,  0.2312, -0.6061,  0.2930,  0.2366, -0.0889, -0.3468,
         1.0355,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4533e+01, -1.0862e+02, -2.7219e+00,  6.0889e-01, -9.1839e-01,
        -5.9126e-02,  4.2011e-02, -3.0945e+00, -4.1461e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.6680e-01,  6.2807e+00,  2.8738e-01, -3.8295e-02, -8.4346e-03,
        -1.6989e-01, -1.3603e-01, -1.9379e-01, -1.1301e-01, -1.0946e-01,
         8.5508e-02, -3.5263e-02,  2.3754e-02, -2.2772e-02, -6.8512e-02,
        -1.5116e-02, -4.3594e-02,  1.0496e-02,  3.6096e-02, -7.4389e-03,
         5.2737e-02,  2.3119e-02, -9.3201e-03, -3.8236e-03,  2.5582e-02,
        -3.8766e-02,  1.2565e-01,  1.5816e-02, -2.3583e-02, -4.8782e-02,
        -8.6000e-03,  2.8345e-02, -5.1904e-03, -2.9419e-02,  2.0504e-02,
        -3.4280e-02, -4.0502e-02,  1.9077e-02, -1.1274e-01, -2.9551e-02,
        -1.9995e-02, -1.7666e-02, -1.5099e-01, -2.1883e-02, -1.2270e-02,
         7.6708e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7986e-02,  1.9994e+01,  7.1625e-01, -8.7156e-01,  1.6879e-01,
        -2.6316e-01,  4.7502e-01,  3.1710e-01, -5.9482e-04,  2.2693e-01,
        -9.7473e-02,  1.2464e-02,  1.8442e-01,  1.4544e-02, -9.5686e-02,
         1.7603e-01,  3.0920e-03,  2.2492e-01, -8.3501e-02,  3.0863e-02,
        -1.8860e-01, -1.7497e-01, -9.0371e-02,  7.5282e-03,  2.3680e-02,
        -2.2731e-01,  9.2378e-03, -1.0829e-01, -2.7841e-01,  2.7683e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0625, -7.5409,  0.2741, -0.0259, -0.0937, -0.3404,  0.1493,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6640e-01, -6.5879e+01, -1.4439e+00,  3.7461e-01,  1.1073e+00,
        -2.1773e-01, -9.7894e-01, -1.2171e+00, -9.8240e-01,  1.4620e-02,
         1.8619e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9378e-01,  8.2249e+00,  1.6309e-01, -4.5324e-02, -1.1421e-01,
         2.0495e-01, -9.7264e-04,  1.8019e-02,  5.6947e-02, -1.0307e-02,
        -5.4196e-02,  1.4650e-01,  1.3390e-01, -1.4797e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0072e+00, -7.9801e+01,  2.4454e+00,  8.7603e-01, -4.0183e-01,
        -3.8867e-01, -1.8290e-01, -6.0377e-01,  8.0120e-01,  8.5155e-01,
        -7.3244e-02,  7.6515e-02,  5.8111e-01,  2.0176e-01,  7.2803e-02,
        -3.8112e-01, -2.5668e-01,  2.8856e-01,  1.6841e-01, -9.1699e-01,
        -7.2866e-01, -5.2943e-01, -2.6006e-01,  6.2186e-02,  5.9175e-01,
         9.1434e-01,  1.0099e+00, -4.4670e-01, -1.9308e-01,  3.1805e-01,
         1.1030e+00, -9.2151e-01,  1.1044e+00, -3.4690e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6477e-03,  3.1828e+00,  2.8151e-02,  1.3120e-02,  3.2532e-02,
        -1.6582e-02,  1.3897e-02, -1.5461e-02, -3.3623e-02,  1.2079e-02,
         2.7812e-02,  1.4033e-04, -7.9919e-03,  3.1336e-03,  2.3692e-02,
        -1.9194e-02, -1.7179e-01, -4.8603e-02,  1.0854e-02, -1.6173e-03,
        -7.4589e-03,  1.6909e-02, -9.7931e-03,  5.1962e-03, -1.6074e-02,
         6.9973e-03, -4.8729e-02,  3.8443e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3205e+00,  4.0975e+01,  1.2018e+00, -2.0838e+00, -6.6478e-01,
        -1.8813e-01, -2.5542e-01,  2.0161e-01, -6.5486e-02,  9.9485e-02,
        -2.3147e-01,  2.4679e-02, -2.7383e-01,  1.9111e-01,  9.4085e-01,
         2.2173e-01, -8.5624e-01,  1.5060e+00,  9.0544e-01,  7.8269e-02,
         5.1455e-02,  8.4646e-02,  4.0457e-01,  1.0722e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 9.4470e-01,  3.2284e+01,  2.7920e-01,  1.1320e+00,  1.3140e-01,
        -6.7817e-01,  7.0650e-01,  3.5201e-01,  5.2154e-02, -1.9188e-01,
         2.4626e-01,  4.4869e-01,  4.0073e-01,  1.1367e-01,  1.1143e-01,
        -2.3848e-01, -5.7001e-01, -1.7968e-02,  1.8060e-01,  6.6829e-01,
        -6.4168e-02, -8.2625e-02, -6.7803e-02,  2.3421e-01, -1.6967e-02,
         1.1420e+00,  4.6356e-01,  5.7380e-02, -2.3004e-01,  3.5731e-01,
         1.5064e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.5558, -73.8006,   5.7188,  -2.7312,  -1.2303,   3.5236,  -0.2096,
          0.1399,  -0.1749,   0.3521,   1.1600,   1.8426,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.6625e+00,  1.5082e+02,  2.6589e+00,  7.9395e-01,  3.0691e+00,
        -4.3839e-01, -3.1215e+00, -2.2178e+00, -5.1931e-02, -4.0312e+00,
        -1.7205e-01, -3.1501e+00,  3.1677e+00, -1.3149e+00,  2.7749e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4589e+00,  5.4068e+01,  3.9907e+00, -1.7796e+00, -3.1144e-02,
         4.2511e-01, -1.1110e-01,  7.6534e-01,  9.5631e-01,  2.9282e-01,
        -1.8232e-01,  2.3163e-01, -3.6568e-01, -1.3046e-01,  1.2120e-01,
         1.6076e-01, -1.2318e-01,  1.1132e-01,  3.2090e-02,  6.0001e-01,
         2.9855e-01,  5.3107e-02, -2.1400e-01,  6.9348e-02,  3.3007e-01,
         3.3333e-01, -1.8190e-01,  1.6871e-01, -3.2533e-01, -4.0028e-01,
         2.2956e-01, -2.9817e-01,  1.6709e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2084, 65.7283,  5.6281, -2.7772,  0.3757, -0.5487,  1.0009, -0.2324,
        -1.0839,  0.8664,  0.1211, -0.6545, -0.9207, -0.5777, -0.6038, -2.6720,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2773, 27.7982, -2.6485, -0.5541,  1.2374,  0.8927,  0.5306,  0.3834,
         1.1741,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4064,  5.9220,  0.2143,  0.6278, -1.0938,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4307e-01,  1.3720e+01, -4.8499e-02, -1.0555e-01, -7.9204e-02,
         7.7380e-02, -7.0248e-02,  2.8172e-02,  1.4812e-01,  8.8421e-02,
        -4.2646e-01, -7.8913e-02, -1.2817e-01, -1.3989e-01,  7.0793e-03,
        -7.1317e-02,  5.7335e-02,  1.4969e-02,  9.3271e-02,  2.5477e-03,
        -1.1766e-01,  3.4735e-02,  3.9917e-02, -6.4222e-02,  6.6115e-02,
         3.9131e-03,  1.5771e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7065, 49.5565,  2.6663,  1.4548,  2.2892,  1.2002,  0.0700,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0404e+00,  5.8401e+01,  1.9411e+00,  1.1024e+00, -9.0947e-02,
         3.7538e-01, -1.7194e-01, -5.1389e-01, -4.3915e-01, -8.6086e-01,
         2.9107e-01,  1.9463e-01, -2.1297e-01,  1.7459e-02, -4.2266e-01,
         9.0835e-01, -4.0389e-01,  2.0119e+00,  6.0017e-01, -2.4043e+00,
         1.0677e-01,  3.6300e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8135e+00, -5.8990e+01,  1.8289e+00, -6.0329e-02, -5.3803e-01,
         6.5046e-01,  1.3977e+00, -6.3001e-01,  8.0556e-01,  1.3036e+00,
        -7.8064e-01,  3.6664e-02,  3.8878e-01,  3.6835e-01, -1.5268e-01,
         4.9533e-01, -3.4469e-01,  2.3141e-02,  6.2119e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4225e-01,  2.4967e+01,  1.3013e+00, -1.2218e-02,  2.7700e-01,
         2.7478e-01, -5.6983e-02,  1.5986e-01,  2.5888e-01,  5.3610e-02,
         4.6805e-02,  6.5027e-02, -1.1245e-01,  1.0742e-01, -1.6316e-01,
         4.1766e-02,  2.3678e-01,  1.1750e-01,  1.0909e-01, -6.2490e-02,
         3.0322e-01,  3.8686e-01, -6.5331e-02,  3.7809e-03,  8.3091e-02,
        -1.1437e-01, -8.4887e-02,  9.5087e-02,  3.6482e-01, -1.7389e-01,
         4.9791e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 1.9812e+00, -1.4234e+01, -1.2082e+00,  2.5023e-01, -3.1299e-01,
         4.3309e-02,  1.8478e-01,  2.5510e-02,  1.9407e-02, -1.7157e-01,
        -1.6065e-01, -7.0603e-02,  2.7982e-01, -1.0376e-01, -5.2514e-01,
        -4.7044e-02,  1.9031e-01,  1.7483e-01,  3.9442e-01,  4.5247e-02,
        -6.5343e-02,  1.3837e-02, -9.1029e-02,  1.9514e-01, -4.5673e-01,
        -3.2761e-02,  3.0760e-02,  5.5405e-01,  3.9679e-02, -4.5733e-01,
        -3.2223e-01,  3.2836e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9370e-01, -4.6693e+01, -3.8473e-03,  7.0212e-01, -1.2406e+00,
         3.1039e-01,  1.4052e-01, -8.0089e-01,  4.9967e-01, -2.5904e-01,
         6.1845e-01, -5.6566e-01,  1.6942e-01, -7.7611e-01,  2.3161e-01,
        -1.6983e-01, -2.9077e-01,  2.1194e-01, -2.1232e-01, -5.6579e-01,
        -5.0820e-01, -7.4170e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3792e-02, -5.5836e+01, -2.7254e-01,  7.9935e-01, -1.3566e-02,
        -3.9320e-01,  7.7053e-01, -2.7813e-01,  4.9096e-01,  3.1087e-01,
        -5.9367e-01, -2.2493e-01, -1.1232e-01, -2.7416e-01, -4.7844e-01,
         8.3051e-02, -1.8254e-01, -3.4739e-01,  2.7429e-02, -6.0820e-01,
        -5.8725e-01,  6.7923e-01, -7.6600e-02,  3.0594e-01,  1.7310e-01,
         1.9069e-01, -1.9958e-01,  6.6103e-01, -5.4983e-01,  1.4785e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3981e-01,  3.0187e+01, -9.4872e-01, -7.4496e-01, -1.8410e-01,
        -8.5562e-02,  1.8701e-02, -9.4423e-02, -6.6400e-02, -2.1183e-02,
        -8.5360e-01,  2.5413e-01, -4.8475e-01,  1.0540e-02, -1.9334e-01,
        -2.7005e-01, -2.7232e-01, -1.6058e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8292e-01, -7.1909e+00, -9.7973e-01,  6.9333e-02,  2.3762e-02,
         2.2306e-01,  6.0449e-03, -1.3398e-01,  8.2796e-02,  9.8936e-03,
        -3.1879e-02,  5.3912e-03,  3.8678e-02, -6.1336e-02, -4.4434e-02,
         4.8645e-02, -2.8829e-02, -1.8173e-02,  3.0780e-03,  5.5613e-02,
         1.3635e-02, -2.9754e-02,  3.3923e-02,  1.5643e-02, -1.3006e-02,
         3.1057e-03, -4.1139e-02, -8.2182e-03,  2.4993e-02,  1.4373e-02,
        -8.3616e-03, -3.3317e-02, -2.1930e-02, -6.5074e-03, -1.6566e-02,
         7.2446e-02, -6.2946e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.2541, 37.3982, -9.0211, -0.8145,  0.3148,  1.5001,  0.4718,  1.7078,
         0.2060,  0.6376, -0.2025, -0.1063,  0.2483,  2.3001,  0.2005,  0.5031,
         0.8907,  1.3806, -0.1718, -0.2163,  0.2712, -0.0612, -2.0176,  0.8915,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5932e-01,  1.5955e+01, -1.3059e-01,  2.0806e-01,  1.5341e-01,
         3.0880e-02, -4.5368e-01,  1.2035e-01,  1.9869e-01, -2.2063e-02,
         7.3705e-02, -2.4092e-01, -1.6438e-01, -4.9505e-02, -5.3502e-02,
        -3.9229e-02, -2.1545e-01, -4.0463e-02, -1.1970e-02, -1.1064e-01,
        -4.5871e-02,  5.6715e-02, -6.2391e-02,  2.4489e-02,  7.7012e-02,
         4.6707e-01, -9.3018e-02,  7.7342e-02,  7.0318e-02, -4.7380e-02,
         2.6525e-01, -7.0002e-02, -3.2658e-02,  1.4033e-01,  1.3890e-01,
        -5.8494e-03, -4.3996e-02, -6.1946e-02,  4.0446e-03, -6.6672e-03,
        -9.4108e-02, -1.0494e-01, -4.4461e-02,  2.6290e-01, -8.8899e-02,
         1.5073e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5144, 15.3496,  0.4893,  0.1598,  0.3214,  0.2559, -0.2965, -0.0365,
        -0.0463, -0.2150,  0.1962, -0.3169,  0.1507, -0.1962,  0.1549,  0.1220,
        -0.1837, -0.3431,  0.1833, -0.0324,  0.2454, -0.0483, -0.6539,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1396, 72.8826, -3.0689,  1.7040, -2.3406,  0.6638,  0.2892,  0.1399,
        -0.8094, -3.4952, -4.2999,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5309e+00, -6.4379e+01,  8.2841e-01,  2.4980e+00, -1.3210e+00,
         1.3951e-01,  8.7798e-01, -7.8868e-01, -1.5187e+00, -4.7657e-03,
        -3.3373e-01,  6.0576e-01,  7.8341e-01,  4.7547e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -4.9383, -43.8720,  -0.1380,  -1.0971,   0.8839,   0.4130,   0.7620,
          0.3212,   0.2169,  -0.6024,  -0.2873,   0.4494,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6003e+00,  1.0277e+02,  9.5170e+00,  4.3162e+00,  5.2167e-01,
        -5.0671e-01, -2.0160e+00,  5.3654e-03,  5.0696e-01,  3.6545e-01,
        -4.3067e-01,  3.0432e-01,  1.1337e+00,  1.1767e-01, -1.7174e-01,
        -5.7197e-02,  1.2359e-01,  1.3422e-01,  2.6955e-01, -6.6511e-01,
        -1.5859e-01,  1.3163e-01,  3.4885e-01, -4.3927e-01,  5.9864e-01,
        -7.0475e-01,  1.9568e-01, -1.4079e-01,  7.5567e-02,  2.2777e-01,
         5.7043e-02,  6.2368e-01,  6.7233e-01,  4.5119e-01,  1.1648e+00,
         2.4862e-01,  5.3973e-01, -6.1174e-01, -1.1152e-01, -8.8288e-02,
         5.7769e-01,  3.1224e-01, -1.7972e-01, -5.1032e-02, -2.9263e-02,
         4.3066e-01, -2.9577e-02, -1.4627e-01,  1.3095e-01, -2.0414e-01,
         3.8631e-02, -7.6551e-03, -3.5407e-01,  1.6520e+00,  8.9700e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-1.5251e+00,  3.7666e+01, -3.6881e-01, -2.1464e-01, -3.4648e-01,
        -3.0170e-01,  3.3219e-02,  9.9605e-02,  8.8100e-02,  2.0449e-01,
         1.6326e-01, -1.7362e-01,  9.5390e-01, -2.0859e-01, -1.1547e-02,
         5.7584e-02, -3.9545e-02,  1.5351e-01,  8.0976e-03,  4.7393e-01,
        -1.6858e-01, -2.9354e-01, -2.2550e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0296e+00,  5.7691e+01, -8.1004e-01,  1.1790e+00, -2.9713e-02,
        -1.0325e+00,  1.9828e-02, -3.3107e-01,  8.3053e-01,  1.8689e+00,
        -2.4938e-01,  5.8235e-01, -8.8213e-01, -6.7623e-01, -8.2947e-01,
         1.4959e-01,  3.0923e-01,  1.5568e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5103e-01, -4.6828e+01,  1.0753e+00,  4.5793e-01, -1.1559e+00,
         5.9228e-01,  3.5170e-01,  1.4495e-01,  6.3040e-01,  6.3697e-01,
         1.4297e-01, -5.1078e-01, -2.6473e-02,  7.5518e-01,  3.2756e-01,
         1.2440e-01,  5.3427e-01,  5.9237e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4027e-01, -8.6696e+00,  4.3460e-01,  2.5632e-01, -1.4060e-01,
         1.5082e-01,  1.7335e-01, -9.1530e-02,  9.6883e-02, -1.0697e-03,
         5.3406e-02, -7.4795e-02, -1.7847e-01,  2.8328e-02, -3.8425e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.9990e-01,  1.2169e+01, -2.2724e-01,  4.5960e-01, -5.7251e-02,
         1.0567e-01,  1.0421e-01,  2.1953e-01,  2.8749e-01,  2.6630e-01,
         2.2863e-01,  1.9674e-01,  1.0687e-01, -2.4485e-02, -4.6666e-02,
         7.4407e-02, -2.9353e-02,  1.2055e-01,  5.1278e-02,  1.0460e-01,
        -1.5572e-02,  1.2435e-01,  4.3039e-02, -3.4505e-03, -1.3487e-02,
         9.4684e-02,  2.9992e-01,  6.5165e-06,  1.3960e-01,  3.6220e-01,
        -7.0955e-02, -1.9497e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5628e-01, -8.5113e+01, -4.6653e+00, -8.7628e-01, -8.0474e-01,
        -2.9504e-01,  1.4461e+00,  4.1095e-02, -9.0523e-01, -5.0484e-01,
         9.3555e-02,  2.2298e-01, -4.1601e-01, -5.1104e-01, -1.8024e-01,
         3.2351e-02,  1.2438e-01,  7.6804e-01,  5.9444e-01,  3.2916e-01,
         1.2025e+00, -4.4601e-01, -8.0910e-01,  2.9352e-01,  7.6377e-02,
         3.0589e-01,  2.1473e-01, -5.2071e-01,  3.3920e-01,  6.5670e-01,
        -1.2751e+00, -1.9138e+00, -1.4281e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5816e-01,  4.0391e+01,  1.6719e+00,  1.5754e-01,  1.3747e-02,
         1.2067e-01, -5.5976e-02, -4.8323e-01,  1.6068e-01,  7.4811e-02,
        -2.7878e-01,  1.8562e-01, -5.5623e-03,  1.2839e-01,  2.7226e-01,
        -2.7929e-02,  1.5644e-01, -4.2561e-02, -1.1163e-01,  2.4635e-02,
        -8.3226e-02, -1.8660e-01, -2.2402e-02, -7.0132e-02,  1.1062e-01,
        -5.2821e-02, -2.9672e-02, -3.6806e-01,  1.4678e-01,  1.3509e-02,
         5.9726e-02,  2.6242e-01,  3.2933e-02,  2.1758e-01, -2.2448e-02,
        -5.8717e-01, -9.0679e-02, -1.4407e-01,  4.9331e-02,  1.1267e-01,
         2.1707e-02,  7.5359e-02,  7.6878e-02,  2.4685e-01, -1.9042e-02,
         1.0583e-01,  3.2984e-02,  8.8307e-02,  1.8904e-01,  8.0389e-02,
        -1.5797e-01,  1.2380e-01,  1.4515e-01,  1.6439e-01,  2.3187e-01,
         5.5942e-03,  3.9984e-01, -8.8901e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8798e-01,  9.2092e+00,  2.5611e-01, -2.9580e-01, -9.6429e-02,
        -3.9295e-02, -3.6109e-02, -1.0584e-01, -7.1558e-02,  1.5282e-01,
        -1.7570e-01,  4.3101e-02, -1.8065e-01,  1.2390e-01,  3.6749e-02,
        -4.5100e-02,  6.3130e-02,  2.5606e-03, -1.3071e-02,  5.4851e-02,
         9.8689e-03, -1.5925e-02, -1.2121e-01, -1.3843e-01, -3.2113e-01,
        -4.4400e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5655e+00,  8.1299e+01, -2.0820e+00, -1.9570e+00, -1.7398e-01,
         3.6179e+00, -8.2349e-01, -5.5970e-01, -1.1807e+00, -5.5895e-01,
         1.1835e+00,  3.0299e-01,  3.9410e-01, -2.8848e-01,  6.8836e-02,
        -2.3110e-01, -7.0122e-01, -3.0335e-01,  5.3566e-01,  5.1656e-01,
        -3.8006e-01, -9.3593e-01, -3.3618e-01, -5.6997e-01, -4.8025e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4495e-01,  8.1006e+00, -2.8661e-01,  1.8618e-01,  1.8186e-01,
         5.2494e-03, -5.3666e-02,  6.5385e-03, -8.7327e-02,  9.9318e-02,
        -7.1112e-02, -5.3734e-02,  1.3817e-01,  3.1432e-02,  6.3512e-04,
        -2.4608e-02,  3.7439e-02,  6.2040e-02,  3.6727e-03, -2.9063e-02,
        -2.2783e-02,  3.2684e-03,  3.5412e-03, -4.1930e-02,  2.2788e-02,
        -3.0247e-02, -1.0772e-01,  3.7979e-03,  2.5523e-02,  1.7043e-03,
         1.4238e-02, -2.8837e-02,  1.1295e-01,  3.3772e-02,  1.4975e-01,
        -2.9134e-02,  2.4838e-02,  4.2530e-03, -1.7122e-02,  2.9596e-02,
        -5.7003e-02,  3.1694e-02,  7.0990e-02,  9.8013e-02, -5.2151e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.9830e-01,  8.0643e+00, -6.8682e-01,  2.4858e-01, -1.5159e-01,
        -2.1773e-01, -1.5403e-01,  2.1184e-02, -1.7875e-01,  6.5408e-02,
        -8.5059e-02, -9.2160e-02, -5.7434e-02,  9.0033e-03, -1.5491e-02,
        -2.8693e-03, -8.7361e-02, -5.5599e-02,  2.4590e-02,  1.6433e-02,
        -2.6466e-02, -3.0499e-02,  1.9036e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0978,  0.9403, -0.0522, -0.0225, -0.0140,  0.0010,  0.0132, -0.0151,
         0.0040, -0.0288,  0.0230,  0.0276,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 1.9195e-02, -1.0601e+02, -6.0664e+00,  1.7076e+00, -5.5218e-02,
        -4.9215e+00,  1.0478e+00,  7.2808e-01, -2.0223e+00, -1.2509e-01,
         4.8894e+00,  3.9716e-01,  5.8780e-01,  6.2050e-02, -7.7708e-01,
         2.2671e-01, -3.4304e-01, -8.6279e-01,  3.0063e-01,  1.1553e+00,
         4.1682e-01, -5.7079e-01,  4.5538e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3994e+00, -8.7587e+01,  3.3971e+00,  2.1754e+00,  1.3617e+00,
         1.9349e+00,  1.6307e+00,  1.2875e+00,  1.5354e+00, -1.5770e-01,
         1.0992e+00,  6.6467e-01,  7.1930e-02, -1.5268e-01,  5.6643e-01,
         4.5834e-01,  1.1253e+00,  5.5839e-01, -2.6940e-01,  7.4199e-02,
        -6.4594e-01, -9.8829e-01, -6.2576e-01,  5.2702e-01,  8.9785e-01,
        -5.8327e-01,  1.0817e+00, -3.2728e-02,  6.4436e-01, -6.6119e-02,
         4.6537e-01,  8.6921e-01, -2.2792e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8570e+00,  5.7400e+01,  2.0573e+00, -9.7093e-01,  4.9362e-01,
         7.9160e-02, -5.7560e-01, -1.5845e-01,  3.9179e-01, -2.3260e-01,
         2.6286e-01, -1.3969e-02,  5.8321e-01,  2.2647e-01,  3.5632e-01,
        -8.5163e-01, -5.0992e-02,  6.3053e-02, -5.3100e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6217, 39.8627, -1.0557,  1.0542, -0.8308,  0.6274,  0.1655, -0.0473,
         0.1599,  1.2055,  0.6871,  0.8388,  1.1708, -0.0657,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2578e-01, -6.2668e+01, -7.3369e-01,  2.4350e-01,  1.7968e+00,
        -2.9229e-01,  3.8521e-01,  7.4974e-01, -6.9978e-01,  1.0660e-01,
         1.9233e-01,  5.6689e-01, -5.3364e-02,  2.2652e-01, -9.5126e-02,
         6.1177e-01,  4.0911e-03,  2.6758e-01, -1.4510e-01,  3.3660e-01,
         1.5875e-01,  6.6042e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5759e+00,  6.1293e+01,  4.1407e-01,  1.0177e+00,  7.8084e-01,
        -4.4159e-01, -6.3498e-02, -1.1399e+00,  2.1504e-01, -1.4412e-01,
        -1.5953e-01, -5.7148e-01, -4.4091e-01,  1.5921e-02, -8.7271e-02,
         3.8605e-01,  5.7571e-01,  2.6377e-01,  1.7176e-01,  3.5033e-01,
         1.3588e-01,  3.2337e-01, -1.8043e-01,  5.9662e-02, -2.6061e-01,
        -4.2091e-03,  1.8590e-01,  3.4308e-01,  2.9835e-01,  1.7403e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5254e-01,  6.7483e+00, -5.8685e-01,  7.5686e-02,  3.5199e-03,
         1.1438e-01, -2.3878e-02,  1.7834e-01, -7.3530e-02,  5.8543e-02,
        -2.3528e-01, -5.8430e-02, -9.1978e-02,  2.9230e-01,  1.0539e-01,
        -6.0318e-02, -4.3540e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7039e-02,  7.0296e+01, -1.4483e+00,  6.3739e-02, -4.1363e-01,
         9.2516e-01, -2.7418e-01,  8.3750e-01,  3.6793e-01,  2.0875e+00,
        -1.4055e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8003e-01, -1.3536e+01, -8.3111e-01,  1.6362e-01,  2.6090e-01,
         5.0747e-01, -1.5807e-01,  1.0214e-01, -5.5495e-02, -1.1494e-01,
         7.4839e-02, -2.8355e-02, -1.1332e-01, -4.0377e-01, -4.4527e-01,
         1.0145e-02,  4.3453e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5119e-01,  1.5597e+01, -2.1874e-01,  2.0908e-01, -1.0203e-01,
        -1.3943e-01, -3.3745e-02, -3.6292e-02, -5.4594e-02, -2.2143e-01,
        -1.5260e-02,  2.8408e-02, -1.0914e-01, -1.6744e-01, -1.9642e-01,
         1.1740e-02, -2.7831e-02, -9.6534e-02, -4.4661e-01,  5.4556e-02,
        -2.5982e-02, -5.4801e-02,  2.1396e-01, -1.4021e-03,  1.6909e-01,
        -2.7241e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7185e-01,  7.0843e+00, -2.7276e-02, -1.2287e-01, -1.6184e-01,
         1.6998e-01,  2.1199e-02,  1.0172e-01,  1.7400e-01, -4.1097e-02,
         1.0332e-02, -1.2403e-03, -3.0160e-02,  3.7656e-02, -1.7032e-02,
         3.3971e-03, -4.8118e-03, -3.0972e-03, -3.9086e-02,  5.3341e-03,
        -1.5495e-02, -9.9237e-02, -6.9431e-02, -2.5707e-02, -9.1323e-02,
        -2.3215e-02, -5.2212e-02, -4.4525e-02, -1.0876e-01,  1.0879e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6253e+00,  9.0571e+01,  2.0037e+00, -3.1587e+00,  1.9327e+00,
        -1.4718e+00,  3.5828e-01,  6.8172e-02,  4.2268e-02, -1.3763e+00,
        -9.8308e-01, -1.0780e+00,  8.3283e-01,  6.7515e-01, -6.0517e-01,
         5.8354e-02,  5.8811e-01,  3.7148e-01, -7.7877e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 9.7182e-01,  8.0788e+01,  7.3428e-01,  1.3958e+00,  1.0892e-01,
         2.9694e-01,  2.6780e+00, -3.7823e-01,  1.0841e+00,  8.0236e-01,
         1.0071e+00, -6.9021e-01, -1.0870e-01,  5.5572e-01, -6.5669e-01,
         1.1382e-01,  4.7218e-01,  6.6741e-01, -3.5005e-01,  3.7547e-01,
        -5.6856e-02, -5.7626e-01,  5.4848e-01,  1.0394e+00, -1.0400e+00,
        -3.2119e+00,  1.3420e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0680e+00,  9.4990e+01, -4.2579e-01, -2.5138e+00, -5.6462e-01,
         1.6764e+00, -2.9617e-01,  8.1746e-02, -6.6935e-01, -6.5800e-01,
        -1.4645e+00,  7.4761e-01, -4.6311e-01, -1.0265e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1091e-01, -3.8288e+01, -2.7517e-01, -1.1287e-01, -9.5586e-02,
        -7.2724e-01,  3.2866e-03,  5.4231e-01, -1.2040e+00, -2.4017e-01,
        -2.6924e-01, -3.4164e-01, -2.9350e-01,  1.2174e-01, -7.3773e-01,
         1.0768e-01, -2.3259e-03,  1.4296e-01, -1.2854e-01, -3.6560e-01,
         1.0505e-01,  3.5230e-01,  8.1206e-02,  8.1734e-02, -2.6703e-01,
         2.0640e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0570,  4.5878,  0.0551, -0.1320,  0.0515, -0.2997,  0.0620, -0.0464,
         0.0190,  0.0401, -0.0166,  0.2706,  0.0476, -0.1338,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1039e+00,  8.1755e+01,  2.0995e+00,  4.4376e-01, -5.7180e-01,
        -3.3700e-01, -2.6763e-01, -8.1195e-01,  1.0540e-01, -1.7589e-01,
        -2.0843e-01, -1.0205e-01, -1.1804e-01,  2.2219e-02,  2.1124e-01,
         1.8463e-01,  3.8873e-01, -6.5680e-03,  4.6574e-02,  2.7747e-01,
         1.5283e-01, -4.1033e-02, -9.5914e-02,  1.5346e-01,  2.9458e-01,
        -5.5289e-01, -3.3778e-01,  7.7138e-01, -1.2419e-01, -3.3282e-01,
        -1.4923e-01,  1.3391e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8179e+00, -9.4441e+01,  3.1501e-01, -7.5520e-01, -3.8339e+00,
        -4.1731e+00,  4.3336e-01, -2.0849e-01, -7.7691e-01,  5.2352e-01,
        -7.7963e-02,  3.1138e-01,  6.9140e-01, -4.1994e-01,  4.2216e-01,
        -9.3847e-01,  4.6901e-01,  3.2977e-01, -3.1793e-01, -4.1473e-01,
         3.7902e-01,  3.8901e-01,  9.6870e-01, -3.4523e-01, -5.6605e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5329,  6.8823, -0.3041,  0.4686,  0.2653,  0.6230,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.6898e-01,  1.6943e+01,  1.0049e+00, -1.0824e-01,  2.5485e-01,
        -2.7479e-01,  9.1633e-02, -7.5252e-02, -4.1001e-02, -1.7554e-01,
        -2.6388e-02,  4.4825e-02, -9.7680e-02,  4.8521e-02, -1.9432e-02,
        -1.1028e-02, -6.4380e-02, -3.9024e-02,  1.0302e-02, -6.5331e-02,
        -2.4389e-02, -5.2245e-02, -9.1720e-02,  4.4701e-02,  3.2232e-03,
        -3.3024e-02, -4.1206e-02, -5.6686e-02, -4.2278e-03,  1.4180e-01,
         3.1227e-02, -4.3039e-02,  9.5477e-03, -2.4469e-03, -7.2945e-02,
        -9.6034e-02,  4.3361e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1359, -9.3791, -0.3926, -0.1071, -0.1904, -0.1017, -0.1547, -0.0276,
        -0.0258,  0.0906,  0.0377, -0.0140,  0.0652,  0.0479, -0.0525,  0.0782,
         0.1170, -0.1829, -0.0643, -0.0103,  0.0373, -0.0421,  0.0295,  0.0404,
        -0.1168, -0.0115, -0.0343, -0.0566, -0.0294, -0.0239, -0.0182, -0.0386,
         0.0191, -0.1711,  0.1473,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2593e-01,  1.3062e+01, -2.6043e+00, -1.6125e-01, -1.0557e-03,
         7.7714e-02,  6.9809e-02,  1.9584e-01, -2.9916e-01,  3.3633e-01,
         1.1649e-02, -4.1977e-01, -3.6056e-01, -3.0050e-01, -9.6077e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1664e+00, -8.6068e+01,  9.0403e-01,  3.1886e+00, -9.4421e-02,
        -5.6180e-01, -5.1048e-01, -1.6228e+00, -1.0150e+00,  7.3573e-03,
        -5.0979e-02,  1.0047e+00,  6.3090e-01,  6.3788e-01, -1.6288e-01,
        -3.1882e-02, -3.2868e-01,  7.6825e-01, -1.5619e-01,  3.2720e-01,
         6.0720e-01,  2.4927e-01,  3.7106e-01,  8.8910e-01, -5.4811e-01,
         6.7210e-01, -6.2721e-02, -1.6355e-01,  6.0358e-01,  6.5369e-02,
         1.5683e-01,  1.0442e+00, -2.3691e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1006e+00,  1.1887e+02, -2.1434e+00, -5.7389e-01, -8.7727e-01,
         2.6901e+00, -3.5148e-01,  8.2684e-01, -2.0935e+00,  1.5804e-01,
         1.1259e+00,  6.3843e-01, -6.9390e-01, -2.2769e+00,  5.9136e-03,
        -5.3360e-01, -4.5055e-02,  6.9337e-01,  6.3123e-01, -3.5139e-01,
        -1.0769e+00, -4.6010e-01,  7.9556e-02, -1.1292e-01,  5.0614e-01,
        -3.8772e-01, -1.9138e+00, -1.3250e+00,  8.8485e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 6.8056e-01,  3.1314e+01,  1.0921e+00, -1.0388e+00,  3.0586e-02,
        -7.2496e-01, -1.9650e-01,  1.2822e-01,  1.3587e-01, -3.1756e-01,
        -1.1976e-01, -6.0404e-02,  2.2361e-01, -6.6186e-02, -6.0794e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2209e+00,  7.9554e+01, -1.5593e+00, -2.3243e+00, -1.2852e-01,
         3.9994e-01, -9.4719e-01, -3.1088e-01,  4.4905e-01,  1.1137e-01,
        -1.9747e-01, -5.8202e-01, -2.2012e-02, -5.1089e-01,  2.2908e-02,
        -3.8890e-01, -6.2371e-01, -1.2260e-01, -7.4813e-02, -5.3126e-01,
        -6.3053e-02, -2.0422e-01, -4.4598e-02,  5.3001e-02,  5.5211e-02,
         4.5977e-01, -5.1360e-02,  5.4692e-01, -1.0006e-01, -1.6055e-01,
         5.3518e-02, -6.4364e-02,  2.5697e-01,  1.9966e-01, -6.0024e-01,
         3.6668e-01, -2.7048e-01,  5.6855e-02,  7.5115e-02, -5.8963e-01,
        -7.5014e-02, -1.3808e-01,  7.5586e-02, -2.1457e-01,  1.6995e-01,
        -4.1194e-01,  1.8123e-01, -8.3351e-01, -2.0553e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9776e-01,  5.6790e+00, -1.1298e-01, -9.6149e-02, -5.6777e-02,
        -3.1996e-02, -5.2759e-03, -1.5010e-03,  1.8912e-02, -1.1651e-03,
        -2.3754e-03, -2.9709e-02, -5.0037e-02, -9.7000e-03, -3.0450e-03,
        -1.0494e-02, -2.4240e-02,  1.4571e-02,  2.3612e-02, -7.0349e-02,
        -4.8220e-02, -2.0391e-02, -3.0774e-02, -1.3394e-02, -2.3304e-02,
         3.4935e-03, -1.6125e-02,  2.2641e-02,  3.7329e-02, -7.3513e-03,
         1.3445e-02,  5.0623e-04, -2.9377e-02,  7.5667e-02,  1.0627e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7761e+00, -3.0985e+01, -8.7823e-01, -1.2950e-01, -6.5149e-01,
        -3.2417e-01,  1.4002e-01, -2.3350e-01, -3.5210e-01, -3.0075e-01,
        -1.8769e-01, -3.8305e-01, -1.9783e-02, -9.3820e-02, -1.1383e-01,
        -3.9428e-03, -2.3919e-01, -6.3281e-01, -3.7447e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6045e+00, -1.0730e+02,  5.5022e+00, -2.7322e+00, -1.0370e+00,
        -3.9478e-01, -3.0009e+00, -1.2401e-01, -4.1615e-01,  7.9443e-02,
         1.4531e+00, -7.9192e-02,  1.3698e+00,  2.7709e+00, -1.5060e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3717,  7.8585,  0.4024, -0.0816,  0.1174, -0.1218, -0.1418,  0.0566,
         0.1120,  0.0303,  0.0367, -0.0320, -0.0915,  0.0506,  0.0996, -0.0503,
        -0.0132,  0.0080, -0.0185,  0.1884, -0.1020, -0.3980,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0898,  4.4460, -0.0553, -0.0824,  0.0106, -0.0084, -0.0578, -0.0137,
        -0.0906, -0.0125, -0.0892, -0.0338,  0.0178,  0.0050, -0.0229,  0.0046,
        -0.0182, -0.0111, -0.0176, -0.0223,  0.0457, -0.0064,  0.0053,  0.0088,
         0.0129,  0.0113,  0.0314, -0.0377,  0.0364, -0.1714,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8512e+00,  3.0131e+01, -1.9387e+00,  3.6140e-01,  1.3217e+00,
         1.3729e-02, -8.1462e-02, -2.2488e-01,  7.1349e-01, -4.7299e-01,
        -9.5213e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6747e-01, -9.3023e+00, -1.2278e-01, -6.5071e-02,  1.1091e-01,
         2.5233e-02, -1.7535e-02,  2.8321e-02,  2.5720e-02, -7.5405e-02,
         7.6530e-02,  3.1669e-03,  1.3223e-02, -3.2809e-03,  3.4954e-02,
        -1.4609e-03, -4.8692e-03,  4.1538e-02,  3.2311e-02,  1.1901e-01,
        -2.5452e-03, -5.0646e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0219e+00,  2.4580e+01,  1.7078e+00,  4.4439e-01,  2.4165e-01,
         1.2443e-01,  3.9174e-01,  6.2962e-02, -2.3071e-01, -9.9104e-02,
        -5.4148e-02, -2.3075e-01,  6.4438e-02, -4.4611e-02, -1.5683e-01,
         2.7546e-02, -8.8810e-02,  2.6240e-01,  2.9980e-01,  4.7088e-02,
        -5.2089e-02, -9.4538e-02, -2.7256e-01, -1.6309e-01, -2.2638e-01,
        -2.7609e-01,  2.1869e-01,  2.2779e-01,  1.5471e-02, -2.0578e-01,
         9.1017e-02,  3.3219e-01,  3.6534e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5962e+00, -4.9389e+01, -2.0127e+00,  3.3826e-01, -7.8673e-01,
        -6.0119e-01, -7.4188e-01, -2.8129e-01, -4.9853e-01,  4.1270e-01,
         1.6203e-01,  3.4292e-01,  1.4133e-01,  1.6810e-01,  7.4945e-01,
        -1.0913e+00, -6.1836e-01,  5.4045e-02,  1.9177e-01, -3.0175e-02,
        -5.0371e-01,  2.2350e-01,  3.5009e-01,  3.5467e-01,  1.4237e+00,
        -9.7191e-01, -2.7578e+00,  1.5795e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1651e-01, -1.2403e+01, -6.8484e-01,  7.2097e-02,  1.9513e-01,
        -7.3452e-02, -3.8140e-02, -4.3304e-02,  3.2755e-01, -2.7485e-02,
         2.7281e-01, -3.2237e-02,  7.1189e-02, -4.5037e-02,  4.5735e-02,
         4.5792e-02,  7.2496e-02,  1.9483e-02,  2.3816e-03, -1.7952e-02,
         1.3599e-01, -1.6539e-02,  1.9871e-01,  9.8232e-02,  6.0863e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-6.8234e-01,  3.7708e+01,  2.4155e-01, -7.9775e-01,  2.6290e-01,
         1.4730e+00, -2.1880e-02, -4.2903e-01, -4.4067e-02, -7.6346e-02,
        -2.8037e-01, -1.7423e-01, -4.7258e-01, -6.3551e-01, -1.1127e-01,
        -1.4117e+00, -1.7841e-01, -2.3709e-01,  1.1471e-01, -3.8808e-01,
         9.0852e-02,  7.7342e-03, -9.8208e-02,  2.4743e-01, -2.1324e-01,
        -4.1134e-01,  3.9994e-01, -8.7489e-02,  2.1529e-02, -1.0099e-01,
         2.4980e-01, -1.2864e-01, -3.7848e-01,  1.5258e-01, -1.9081e-01,
         1.3104e-01, -3.2460e-02, -5.3839e-02, -4.3965e-01, -1.7242e-01,
         2.1736e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0554e+00,  8.3725e+01,  8.1504e+00, -6.1495e-01,  4.7513e+00,
         1.5467e-01,  3.8230e-02,  1.2934e+00, -5.8458e-01, -2.4437e-02,
         6.0609e-01, -2.0331e+00,  5.7679e-01,  8.4641e-02,  3.1153e-01,
        -1.1946e+00, -2.1038e-01, -4.3054e-01,  7.7709e-02, -5.5452e-01,
        -3.2522e-01, -1.1086e+00, -1.9684e-01,  1.0377e+00, -6.3629e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7802e+00, -4.7843e+01, -2.1727e+00,  1.0384e+00,  5.2572e-02,
         5.9312e-02,  5.6459e-01, -7.8344e-01,  5.2551e-01,  8.1036e-01,
        -1.5735e-01, -2.3765e-02,  3.6277e-01,  6.2342e-02,  2.5659e-01,
        -1.1888e-01,  1.8559e-02, -1.6471e-01,  5.0399e-01,  1.8389e-01,
         7.2113e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8657, 34.0682,  2.0294, -0.6906, -0.9080, -0.6227, -0.7081, -0.4371,
        -0.1159,  0.2558, -0.0804, -0.4275, -0.0712, -0.3890, -0.4444, -0.0727,
        -0.5516,  0.1150,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4244e-02,  4.9256e+00,  9.1013e-02,  1.6146e-01,  7.4750e-02,
         9.6388e-02, -5.7857e-02,  5.0225e-04, -2.0858e-02,  2.0125e-02,
         2.4047e-02, -6.6467e-02, -3.9234e-02, -1.9814e-02,  1.5533e-02,
        -1.5198e-02, -8.7402e-02, -3.5479e-02,  1.4866e-02,  2.0050e-02,
         4.0470e-03,  1.7904e-02,  1.1305e-02, -4.8777e-02,  2.3537e-02,
         1.2238e-02, -3.9938e-02,  2.6897e-02,  1.1319e-02, -1.4785e-03,
         1.0816e-02, -6.8810e-03, -2.4071e-02,  8.2961e-04, -6.7297e-02,
         2.9023e-02, -1.4217e-03,  1.0037e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4589e-01,  4.2639e+01, -3.3674e+00, -8.5513e-01,  6.5575e-01,
        -1.1345e-02, -1.0652e-01,  1.5761e-01,  1.2910e-01,  7.8722e-01,
         3.2017e-01, -6.4754e-01,  7.3864e-02, -1.0797e-01, -9.7222e-02,
         2.1756e-01, -2.2525e-02, -3.0929e-01,  5.9534e-02, -1.4315e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9688e+00, -6.9808e+01, -2.9668e+00, -1.5175e-01, -1.1506e+00,
        -2.2828e+00, -4.4515e-01, -6.5542e-01,  2.8826e-01, -6.3104e-02,
        -1.2656e+00,  6.2663e-01,  2.2824e+00, -4.3324e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2878e+00,  4.4199e+01, -3.1358e+00, -2.9137e-01,  1.1198e+00,
        -3.2148e-01, -2.2815e+00, -6.3842e-01,  4.6819e-01, -3.8628e-01,
        -1.8555e-01, -1.3219e+00, -3.5844e-01, -5.9423e-01, -3.3118e-01,
         1.3014e-02,  2.5441e-01,  2.8655e-01, -5.6477e-02, -2.1786e-01,
        -5.1104e-01, -2.3837e-01,  3.9554e-02,  4.5027e-01,  3.7804e-02,
        -2.1334e-01,  3.6901e-02,  2.0594e-01, -5.3215e-01,  4.5627e-01,
        -3.5400e-01,  7.0104e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9735e+00, -8.1707e+01, -2.8961e+00, -3.7207e+00, -2.5738e+00,
         1.4730e+00, -2.0550e+00,  1.3803e+00, -3.0435e-01, -5.5358e-01,
         2.5267e-01,  7.0744e-01, -7.3382e-02, -2.5928e-01, -6.0945e-01,
        -9.0273e-01,  1.8397e-01, -7.4867e-01,  1.6990e+00,  3.9547e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7762e-01,  9.9498e+00, -1.9193e-01, -4.5288e-01, -3.0899e-01,
        -3.6689e-02, -8.0419e-02, -9.7788e-02, -2.2509e-01,  1.1413e-01,
        -8.2217e-02,  4.6041e-03, -6.8420e-02, -3.9057e-03, -2.1446e-02,
         1.8409e-02, -7.2832e-02, -2.9738e-01, -4.8809e-02, -2.3533e-02,
        -1.0798e-01,  3.0380e-02, -6.2392e-02,  2.5827e-02, -3.8391e-02,
        -2.9623e-02, -1.3851e-01, -1.0982e-01, -3.8341e-02, -1.5192e-02,
         9.6469e-02,  1.0215e-02,  1.3418e-01,  3.7547e-02,  1.6008e-02,
         1.4332e-01,  3.4439e-02, -4.4086e-02, -4.1283e-02,  1.7710e-02,
        -2.4887e-02,  5.3792e-02,  1.5145e-01,  1.7550e-02,  5.7029e-02,
         7.6588e-02,  1.7200e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5824, 34.4304,  0.9550, -0.7896,  0.9868,  0.5309,  0.1562,  0.1342,
         1.0771, -0.7044, -0.9604,  0.3492,  0.0908, -0.1867, -1.5426,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4129e-02,  4.7764e+00,  6.7822e-02, -1.9857e-01,  8.7439e-02,
        -1.1943e-01,  3.5764e-02,  4.3172e-02,  4.8565e-02,  7.9523e-03,
         5.6541e-02, -8.8365e-02,  4.4844e-03, -1.5103e-02, -1.9113e-02,
        -3.3337e-02,  1.3765e-02,  2.7630e-02,  1.7615e-02, -7.3498e-03,
         1.9437e-02, -3.3883e-02, -1.6754e-02,  9.3338e-02, -1.5578e-02,
         5.9966e-03,  5.8471e-02, -1.1176e-02, -3.7133e-02,  3.7027e-02,
         1.4033e-02, -1.3900e-02,  4.8434e-03,  6.5744e-03,  4.6235e-02,
         8.4252e-03,  2.3005e-03, -2.3579e-03, -9.9271e-02, -1.0769e-02,
         3.0254e-02, -4.5871e-03, -2.6566e-02,  8.8604e-04, -2.6020e-02,
        -2.4376e-02,  2.3330e-02,  2.5781e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 8.4795e-01, -1.1226e+02, -4.0845e-01,  6.2711e-01,  8.0291e-01,
         2.6955e+00,  4.6818e-01,  6.0477e-01,  5.3218e-01,  2.9806e-01,
        -4.0935e-01,  2.6621e+00,  1.1460e+00,  3.7685e-01,  1.6804e+00,
        -5.3360e-02,  2.7595e+00,  7.1224e-03,  4.2164e-01,  6.6736e-01,
         2.3810e-01, -2.0313e-01,  3.3405e-02,  3.4507e-01, -1.4617e+00,
         2.8615e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6775e-01, -7.8418e+00,  1.6755e-03, -1.3915e-01, -2.8405e-02,
        -1.6354e-02,  1.7094e-01,  1.7905e-02, -1.0237e-01, -1.3864e-02,
        -2.4130e-01,  7.5337e-02,  5.6249e-02, -3.7289e-02, -1.0544e-02,
        -1.5835e-03,  1.5476e-01,  5.6519e-02, -6.0561e-02, -2.1106e-01,
        -6.3548e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.4308e-01,  1.3415e+01, -2.9322e-01,  8.3344e-02,  9.1210e-02,
        -1.0205e-01, -6.5410e-02,  4.6747e-02, -4.3820e-02,  3.2136e-02,
         3.2750e-02,  6.8256e-03, -1.2874e-03, -1.5648e-02, -9.0155e-02,
        -7.6943e-02,  2.4938e-02, -1.7280e-01,  1.3426e-01, -3.6999e-01,
        -8.7955e-02,  5.7390e-03, -3.0291e-03,  1.1650e-01,  1.4696e-01,
         1.8473e-02, -5.4735e-02, -1.2393e-02, -1.4486e-02,  2.1155e-03,
        -4.3847e-02, -2.2847e-02,  3.6245e-02, -6.6331e-02, -6.0798e-02,
         3.6996e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1691e-01,  2.0778e+01, -2.4086e-01, -1.3429e+00, -8.7439e-02,
         1.3796e-01, -2.5810e-01, -3.2867e-01,  1.7616e-01, -1.3065e-02,
         4.5160e-02, -1.3362e-01, -2.8603e-01, -2.0292e-01, -2.7010e-02,
        -4.0087e-02, -3.2258e-02, -3.2263e-01,  7.1014e-01,  5.2794e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9447e+00, -2.5189e+01, -1.5975e+00, -3.7478e-01, -7.8452e-01,
         1.7018e-01, -5.2009e-01,  2.4892e-01,  8.6253e-02, -1.6177e-02,
        -1.4934e+00, -3.0502e-01,  2.8935e-01, -2.6206e-01, -5.6753e-01,
        -1.2105e-01,  1.9302e-01,  3.5145e-01, -2.3333e-01, -1.0013e+00,
         2.6982e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0448e-01,  3.6443e+01,  5.6930e-01,  1.7488e-01, -4.0219e-01,
        -7.9762e-01, -1.9231e-01, -1.0396e-02,  4.2998e-01,  4.7410e-01,
        -1.1396e-01,  2.1907e-01, -3.1515e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9021e-01, -4.3541e+01,  1.1127e+00,  4.8818e-02, -5.2328e-01,
         4.1947e-01,  9.2681e-01,  3.2873e-02,  1.3677e+00, -1.1268e-02,
         4.9046e-01,  6.3156e-01,  1.8268e-01,  1.0772e-03, -9.2702e-01,
        -2.4649e-01, -8.8196e-02, -5.1526e-01, -1.2987e-01, -1.4253e-01,
         2.0051e-01, -1.1647e-01,  5.2786e-01,  2.1297e-01, -1.1080e-02,
         7.5922e-02,  1.1828e-01, -4.4662e-01, -2.2116e-02, -2.1908e-02,
        -1.4018e-01, -4.6208e-03, -3.2216e-01,  1.0022e-01, -2.1481e-01,
        -1.0673e-02, -6.5820e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8207e+00,  4.9172e+01,  4.9915e-02, -4.9020e-01, -7.0255e-01,
        -2.6463e-01,  2.3195e-01,  3.5642e-01, -4.6090e-01, -6.2593e-01,
        -1.1320e+00,  1.2061e-01, -1.6954e-01,  3.5951e-02, -6.8659e-01,
         1.2128e-01,  2.7457e-01, -2.3941e-01, -2.8307e-01, -3.0571e-01,
         1.6218e-01, -3.2403e-01,  4.1536e-02,  5.5666e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5154, 19.9940,  0.9963,  0.3135, -0.1651,  0.2258, -0.1104,  0.3665,
         0.0939, -0.6168, -0.4695, -0.0842,  0.2114,  0.0608, -0.1416, -0.2566,
        -0.2759,  0.0616,  0.0374,  0.0696,  0.1327, -0.0512, -0.2593, -0.0604,
        -0.0741, -0.0925, -0.0270, -0.1953,  0.0676, -0.0407, -0.5762, -0.2267,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5255e+00, -8.6675e+01,  1.1852e-01,  4.3573e-01,  1.5580e+00,
         1.4198e+00,  4.5519e-01,  1.7897e+00,  3.2666e-01,  1.2174e+00,
         4.7355e-01,  3.5577e-01, -1.3831e-01,  4.7220e-01,  6.0355e-01,
        -7.3845e-01,  1.6372e-01,  7.5300e-02,  4.8758e-01, -3.3235e-01,
         4.4563e-01, -4.4814e-01,  2.6946e-01, -9.7666e-02,  9.8382e-02,
        -6.5296e-01,  1.9957e+00,  7.2318e-02, -7.5512e-01,  2.7132e+00,
        -3.0451e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1468e-01,  2.3404e+00,  5.0169e-02,  1.9848e-02, -2.5256e-02,
        -1.4606e-02,  1.6016e-02, -5.0411e-03,  7.1552e-02, -2.2270e-02,
         4.5616e-02, -2.0726e-02, -4.0102e-02, -1.6082e-02, -1.5722e-03,
        -2.0214e-02, -8.6515e-03, -1.8332e-02, -1.1994e-02, -1.8116e-02,
         2.5249e-02, -6.6402e-03,  3.3907e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1459e-01, -2.8739e+01, -4.7873e-01,  9.0108e-02, -9.4946e-01,
        -4.1782e-01, -1.3545e-01,  2.9576e-01,  1.3766e-01,  2.5861e-01,
         1.9767e-01,  7.6344e-01,  1.2594e-01,  3.2921e-01,  1.8647e-02,
        -1.1121e-03,  5.7217e-02,  1.0166e-01, -1.6652e-02, -1.0811e-01,
         3.3386e-01,  1.7815e-01, -1.7467e-01, -1.3278e-01, -5.7565e-03,
        -7.9993e-02, -6.4584e-01, -4.0537e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 4.3562e-01, -4.6627e+01, -4.2993e-01,  2.4357e-02, -1.4504e-01,
        -1.0402e+00, -2.0389e-01,  6.1976e-01, -4.7313e-02, -2.0212e-01,
        -1.4898e-01, -2.5496e-02,  3.7844e-02,  2.1236e-01,  1.0358e-01,
        -1.8960e-01, -2.8193e-02, -5.4421e-01,  1.1741e-01,  1.3478e-01,
        -9.0300e-01, -2.5751e-01,  3.9747e-01,  6.8900e-02,  2.4838e-01,
         1.0191e-01, -8.9229e-01, -1.0400e+00,  7.5844e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2887e+00, -1.1108e+02,  8.5991e-01, -1.0591e+00, -3.7623e+00,
         1.5402e+00,  1.0451e+00, -5.2393e-01, -6.0043e-01, -3.9767e-01,
        -8.5022e-01,  9.9863e-01,  1.7704e+00, -7.8316e-02, -1.9215e-01,
         7.4609e-01,  2.8432e-01,  2.0506e-01,  5.4577e-01,  3.4143e-01,
         1.3379e+00,  8.0801e-01,  1.3129e+00, -1.7116e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0620e+00,  9.9455e+01,  2.9212e-01,  1.4759e+00,  1.4352e+00,
         1.6051e+00, -7.6609e-01,  1.7140e+00,  1.5583e+00, -5.7930e-01,
         2.8197e-01,  1.9478e+00,  8.9609e-01, -7.7023e-01, -5.4160e-01,
         3.0974e-01, -2.2470e-01,  5.1153e-01, -7.2702e-01,  6.3538e-02,
         1.7951e-01,  1.8653e-01, -3.6997e-01, -4.2210e-01,  2.2744e-01,
        -5.1416e-01, -5.7967e-01,  4.7713e-03,  3.0623e-01,  2.0381e-01,
        -8.9688e-01, -4.2862e-01,  1.9188e+00, -2.7203e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.3002e-01,  6.4475e+01, -3.0621e+00, -1.1306e+00,  6.2404e-01,
         4.1007e-01,  2.2654e-01,  9.3129e-01, -1.8118e-01, -1.3883e+00,
         6.6637e-02,  8.5997e-02,  1.0732e-01, -8.3396e-01,  6.0214e-02,
         5.6619e-01, -7.3154e-01, -1.8373e-01, -8.0897e-02,  1.3273e-01,
        -1.1855e+00, -5.3248e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  4.4657, -64.7562,  -0.9670,   0.3512,  -3.4401,   1.3351,   2.4926,
          0.0897,   0.3702,  -0.1490,  -1.8050,   1.0028,   0.6847,  -1.3335,
          0.6125,  -0.2741,   0.8207,  -0.7023,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.6342, -59.7045,   0.8153,   0.9264,  -0.3916,  -0.4038,  -0.6726,
          0.7447,  -0.4329,  -0.1301,   0.3453,  -0.5331,   0.0749,   0.5309,
         -0.5119,   0.2645,   0.4583,   0.6899,   1.0631,   0.2830,   0.1868,
          1.3596,   0.4472,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6698e+00, -6.0030e+01, -2.3486e+00, -1.6775e+00, -2.0394e-01,
        -4.7607e-02,  7.0301e-01,  5.9974e-01,  9.1248e-01,  3.1460e-01,
         1.1424e-02,  8.8265e-01,  4.9652e-01, -2.9520e-01,  2.1253e-01,
         1.0664e-01,  4.1835e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([   5.1281, -107.4757,   -8.1258,   -2.5603,   -3.8881,    0.3372,
          -3.6011,    1.3312,   -0.6542,    3.5043,   -0.8745,    0.3962,
           2.4304,    0.5921,    0.6873,    0.4444,   -1.4991,    0.9334,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1604e+00,  6.6413e+01, -2.9460e+00,  1.0503e+00, -6.1271e-01,
        -5.7580e-01,  4.0644e-01, -1.7177e-01, -4.2407e-01, -1.0899e+00,
        -1.0619e+00, -1.8482e-01, -9.1043e-01, -5.8238e-01, -8.1985e-01,
         9.4173e-01,  3.0218e-01,  1.4335e-01,  5.5508e-01, -1.6925e-01,
        -1.7480e+00, -4.5249e-02,  6.2515e-01, -1.8742e-01, -1.5010e+00,
        -5.3317e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3094e-01,  2.4467e+01,  3.9096e-01, -5.2879e-01, -1.4861e-01,
        -3.2601e-01, -1.7198e-02, -8.1751e-02, -2.5773e-01, -2.9933e-02,
         2.7536e-01, -1.0134e-01, -1.1292e-01,  1.4974e-01, -9.3686e-02,
        -5.3765e-02, -1.0989e-01,  3.1654e-02, -2.3295e-03, -6.8268e-02,
         3.9843e-02, -3.8655e-01,  2.0693e-02,  8.5769e-02,  3.0517e-02,
         1.2254e-01,  1.0710e-02, -6.9123e-02,  1.5585e-02,  2.5843e-01,
         4.7116e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.7528e-01, -6.3155e+01, -2.1320e+00,  7.2878e-01, -8.3044e-02,
        -3.6140e-01, -8.2265e-01, -1.7276e+00, -6.0982e-01, -1.1784e-01,
         6.8255e-01,  3.6476e-01,  6.4922e-01,  1.0083e-02,  8.5131e-01,
        -4.0144e-01,  5.1744e-02, -8.4835e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3304, 73.6799, -2.9289, -2.3013,  2.4308,  1.9368,  0.6061,  0.3715,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.1387e+00,  1.3073e+01,  1.4774e-01, -3.3382e-01, -3.1958e-01,
         1.5121e-01, -2.1341e-01, -6.3034e-02, -2.2844e-01,  4.3588e-01,
        -5.0717e-02, -7.5901e-02, -1.1436e-02,  9.4481e-03,  3.6934e-02,
         5.7692e-02,  3.5442e-02, -6.3618e-02, -1.2463e-01, -1.1391e-01,
        -8.5515e-02,  1.4924e-02, -1.0796e-01,  4.7735e-02, -1.6991e-01,
        -5.3276e-03, -5.6206e-02,  9.1374e-02,  3.7473e-03,  2.3463e-01,
        -4.4941e-02, -1.4187e-02,  1.0776e-02, -2.8170e-02,  7.4290e-02,
        -2.3695e-02, -1.1207e-01, -1.1415e-03, -2.1258e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8476e-01,  2.4747e+01, -2.9645e-01, -1.5549e-01,  3.8889e-01,
        -4.8575e-02, -5.8495e-01, -6.4699e-02,  1.7844e-01, -3.5231e-01,
         1.6651e-01,  1.4088e-01,  4.3589e-01, -1.8475e-01, -2.5153e-01,
        -1.9699e-02,  2.2796e-01, -3.8295e-02,  3.1032e-02,  2.5470e-01,
         5.2400e-01, -1.5121e-01, -1.8558e-01, -6.1065e-02, -1.1173e-01,
         1.7701e-02, -1.2457e-01, -6.9553e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5478e+00, -2.3012e+01,  1.1974e+00, -5.9376e-01,  1.9946e-01,
        -1.6350e-01,  5.9093e-01,  4.8498e-02, -1.6555e-02,  3.1384e-01,
        -1.3668e-01,  4.5008e-01, -6.8135e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.4078, -56.6876,   3.1853,   0.9239,  -0.1819,   0.0878,  -0.1680,
         -0.6536,  -0.5126,  -0.7238,   0.7534,   0.8396,  -0.1902,   1.4484,
          2.0131,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8190e+00,  2.9316e+01,  6.4939e-01,  6.2605e-01,  6.0236e-01,
         3.4031e-01,  3.6196e-01, -6.0801e-01, -6.4356e-01,  2.5902e-01,
        -7.2388e-01,  2.0872e-01,  4.8330e-01, -5.2073e-02,  1.8581e-01,
         5.4109e-02,  4.1334e-02, -2.7736e-01,  2.0897e-01, -4.1074e-02,
        -1.1091e-01, -6.2887e-02, -2.2652e-01, -8.7691e-02,  2.9572e-01,
         1.3134e-01, -6.7933e-02,  1.9071e-01, -2.5687e-01, -2.1928e-02,
        -5.9424e-02,  8.4808e-02,  4.7780e-02, -1.3062e-01,  3.4147e-01,
         1.6134e-01, -8.7899e-02, -3.9276e-02, -1.2127e-01, -8.0942e-02,
        -3.4438e-01, -1.4818e-01,  1.1910e-02,  3.7972e-03,  2.0013e-01,
         1.8810e-01, -3.3085e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6212e-01,  4.6839e+01,  2.0498e+00, -2.9823e-01, -1.2127e-01,
        -4.8898e-01,  1.0801e-01,  1.4516e-01, -3.6200e-01,  7.0599e-02,
        -1.5375e-01, -6.2229e-01, -1.6205e-01,  2.6162e-02,  4.6809e-02,
        -1.4472e-02, -1.5542e-01,  6.9878e-02,  1.9530e-01,  6.0827e-02,
        -1.4453e-02, -3.3389e-01, -1.0967e-02, -1.9336e-01,  1.3001e-01,
         2.0855e-01,  4.3795e-01, -1.7818e-01, -2.1227e-01,  1.7289e-01,
         1.4479e-01,  3.8223e-02,  6.0929e-02,  2.3020e-01,  2.6051e-01,
         4.6228e-02, -2.9219e-01,  2.9903e-02,  1.1547e-01, -1.8050e-01,
         2.0247e-01,  6.0333e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9578e-01,  8.8803e+00,  3.2272e-01, -1.5549e-01, -5.7316e-03,
         2.9525e-02,  7.4248e-02,  1.6040e-02, -4.6098e-02, -7.6826e-02,
         1.0568e-01,  3.4429e-02,  9.5014e-03,  3.7835e-02,  2.0512e-01,
        -1.0168e-02,  1.3283e-01,  5.0451e-02,  3.9248e-02,  1.4211e-02,
        -3.6378e-02, -2.0856e-02,  4.3529e-02,  1.4945e-01, -1.3486e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3664e-01,  1.3922e+01, -3.3185e-02, -1.2239e-01, -5.6308e-02,
        -1.6618e-01, -3.1263e-01, -1.1524e-01, -1.5411e-01, -1.1866e-01,
         2.5656e-02,  1.6692e-01,  8.3359e-03,  2.0043e-01, -1.5717e-01,
        -6.8791e-02,  7.2656e-02, -7.1913e-02, -3.2594e-02,  2.4362e-01,
         2.2248e-01,  1.5710e-01,  9.6674e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0914e+00,  2.8556e+01,  4.9709e-01,  5.2735e-01, -1.0105e-01,
         7.6539e-02,  3.0359e-01, -1.4576e-02,  3.4222e-01,  1.3116e+00,
         7.0464e-01, -3.5166e-01, -1.7745e-01, -2.0210e-02, -4.3946e-02,
        -9.4596e-02,  6.6644e-03,  5.8772e-02, -9.7103e-02,  6.1937e-02,
         2.3462e-01,  3.1386e-02, -1.1416e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1682,  1.2252,  0.0372,  0.0092, -0.0155,  0.0213,  0.0203,  0.0057,
        -0.0181,  0.0132,  0.0074,  0.0373, -0.0677,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.2730, -14.2379,   0.6717,   0.4252,   0.0379,  -0.0279,   0.1461,
          0.2040,  -0.1585,  -0.2326,  -0.2078,  -0.7308,  -0.2829,   0.0307,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0242e+00, -1.5958e+01, -1.3042e+00,  3.4979e-02, -5.7731e-02,
         1.1984e-01, -1.0378e-01, -3.1722e-02, -7.5799e-02,  4.0130e-01,
         1.9584e-01,  4.2011e-02, -1.1207e-01, -3.4893e-01, -5.6212e-02,
        -5.5488e-03,  2.1703e-01, -3.0067e-01, -5.0682e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 2.5705e-01,  7.3292e+00, -5.4041e-02, -2.0627e-01, -8.4312e-02,
         1.2018e-01, -1.5593e-03,  3.8719e-02,  2.4807e-02, -3.2049e-03,
         2.5406e-03, -5.9794e-02,  9.9960e-03,  2.5980e-02, -7.8425e-03,
        -3.4079e-01,  2.7721e-02, -7.2563e-02,  1.3829e-02,  1.5247e-01,
         1.8572e-02,  7.1605e-02, -1.7437e-02,  5.0818e-02,  5.6474e-02,
        -4.9371e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2361, 35.9478, -0.6916, -0.6604, -0.4935,  0.2077, -0.1666, -0.4420,
         0.2820, -0.5968, -0.1101,  0.2365, -0.5952, -0.3822, -0.5369, -0.6405,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1845, 46.1513, -0.1576,  1.1047, -0.9910,  2.9904,  0.8954, -0.3408,
        -0.0521, -0.4668, -0.9843, -0.2950, -0.3442,  0.2780,  0.2619,  0.0906,
         1.6074,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.1797, 81.9716,  0.0822, -3.3717,  4.7423,  2.7945,  1.8167,  4.7861,
        -3.7756,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.3703e-02,  2.0203e+01, -6.1701e-01, -2.2780e-01, -6.9291e-01,
         6.9431e-02,  1.1083e-01,  1.4786e-01, -3.7546e-02,  6.9606e-02,
         5.0436e-01,  8.4164e-02,  1.0811e-01,  5.0237e-02,  9.5030e-02,
         1.6937e-01,  2.5477e-01, -1.1929e-01,  3.1916e-01, -8.9943e-02,
        -1.0910e-02, -4.2527e-02,  2.7146e-01, -4.6361e-02,  2.2030e-01,
        -3.9156e-01,  1.4052e-01,  3.5142e-02,  2.2876e-02, -2.3483e-01,
        -8.8278e-02,  1.4419e-01, -5.5391e-02, -1.8156e-01, -3.1203e-02,
        -2.1694e-01,  8.2687e-02,  1.4131e-01, -1.0011e-01,  2.7377e-02,
        -2.0774e-01,  4.4450e-01,  5.0319e-02, -2.8875e-02, -2.2047e-01,
        -9.4479e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9717e-03,  8.4103e-01, -1.8007e-02, -1.7674e-02, -7.9097e-03,
        -2.7222e-03, -3.1712e-04, -3.7916e-03,  2.6414e-03,  1.0024e-02,
        -8.2276e-03, -8.4578e-04,  4.5781e-03, -1.8670e-03, -1.4752e-03,
         3.7016e-03,  2.9462e-03,  4.0807e-04,  8.1268e-03,  2.2558e-02,
         3.2913e-03,  4.3488e-03, -7.9329e-03, -4.0096e-03, -3.4531e-03,
        -1.3124e-02, -9.0974e-03, -6.8714e-03, -1.7898e-03,  7.3710e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.4810e+00,  1.0392e+02,  4.4697e-01, -7.9940e-03,  2.0094e+00,
         3.6829e-01, -2.8385e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  -4.0757, -119.4514,   -3.6194,   -0.3406,    3.8703,    2.6076,
           1.2934,   -1.6380,    4.5371,    4.9823,    7.4987,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1702, 35.5736,  1.5305,  1.9677,  0.4167,  0.8480, -0.8553, -0.0470,
         1.2415,  0.6264, -0.3631,  0.6704, -0.6960, -0.6984,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.0597e-02,  6.3683e+01,  2.3810e+00,  5.9482e-01, -1.6189e-01,
         3.3969e-01, -4.6619e-01,  2.7715e-02, -8.7209e-01, -2.9200e-01,
        -3.5277e-01,  5.4149e-01, -1.0512e-01,  2.3212e-01, -6.6796e-02,
         7.6031e-02,  1.5501e-01, -2.3667e-01, -1.7047e-01,  1.6886e-01,
         7.8653e-01,  9.5363e-01, -3.0715e-01,  3.6342e-01, -2.2108e-01,
         4.5684e-01,  3.4431e-01,  2.4290e-01, -1.6416e-01, -2.0541e-01,
         7.2564e-01,  2.3622e-01,  2.4922e-01,  4.3847e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5803e-01,  1.9046e+01,  1.3175e+00,  2.1771e-01,  3.4433e-02,
         2.4764e-01,  1.3115e-01, -3.5085e-01, -1.9232e-01,  6.9529e-02,
         3.0939e-02, -6.9322e-02,  9.3384e-02,  2.0043e-01, -5.1858e-01,
        -1.7427e-02,  1.3325e-02,  1.2531e-01, -3.4194e-02,  1.7624e-02,
         4.3849e-02,  1.1117e-03, -4.5157e-01, -1.2833e-02,  8.5587e-02,
         3.1162e-03,  4.7250e-01,  2.7994e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5942e-02,  7.8101e+00,  1.7621e-01,  3.3716e-02,  5.2196e-02,
        -5.0403e-02, -3.7034e-02,  8.8772e-03, -5.3357e-02,  1.9998e-02,
        -1.9143e-03,  2.0210e-02,  9.2362e-02, -9.1664e-02, -1.0701e-02,
         1.6345e-02,  9.4359e-04, -9.4543e-02, -2.0379e-02,  2.5705e-02,
         4.3717e-02,  1.5853e-02,  2.7168e-02, -1.0582e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 2.3492e-01,  1.2934e+01,  8.1729e-01, -7.4939e-02,  2.3323e-01,
         1.2094e-01, -2.2035e-01, -1.6726e-01,  2.7258e-01, -1.2805e-01,
        -2.3476e-01,  1.0732e-01,  1.2476e-01, -2.1795e-03,  3.3576e-02,
         5.3959e-03, -8.2392e-02, -5.8936e-02, -1.6386e-01,  3.7251e-01,
         3.9497e-02, -7.6265e-02, -1.1795e-01,  7.6062e-02,  1.9792e-01,
        -1.4149e-02,  3.4924e-03,  2.1661e-02,  7.6743e-02,  1.9481e-01,
        -2.3273e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4941e+00,  5.5771e+01, -5.8504e-02,  8.7323e-01,  2.1911e-01,
         3.4703e-01, -6.5615e-01,  4.5162e-02,  1.3049e-01,  7.3782e-01,
        -4.1062e-01,  6.0747e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1619e+01, -1.0737e+02, -8.3269e-01,  2.4229e+00, -2.1159e+00,
         5.4967e-01,  3.5800e-01,  5.4599e-01,  3.4229e-02, -4.4983e-01,
        -1.5662e-01,  7.8721e+00, -3.1251e+00,  5.1456e+00, -2.5873e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9689e+00, -1.0014e+02, -1.2229e+00, -1.0461e-01,  3.4638e-01,
        -5.8931e-01, -1.8808e+00, -7.4999e-01, -1.9249e-01, -1.6907e+00,
         1.7406e+00, -3.9761e-01,  1.0960e+00,  4.1562e-01,  2.4636e-02,
         4.0278e-01,  2.8861e-01,  4.5600e-01, -1.9965e-01, -3.0357e-01,
        -1.7607e-01,  2.0681e-01, -2.7853e-01, -7.3043e-02, -8.7817e-01,
         1.6834e-01, -5.7794e-02,  8.2735e-01,  2.0095e-01, -4.0581e-01,
        -7.1872e-01, -1.6713e-01,  3.4842e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.4198, -30.7536,  -1.4197,   1.3157,   0.2240,   0.0911,   0.0427,
          0.2372,   0.5956,   0.2282,   0.3618,  -0.0354,   0.3959,   0.2268,
         -0.5427,  -0.4927,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.6608, -60.8074,   1.3900,  -1.1535,  -0.3440,  -0.3431,  -1.5199,
          0.8307,  -1.7089,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6749, 20.8787, -2.0861, -0.0976, -1.4984,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5744e+00, -8.5008e+01, -1.6564e+00,  1.1205e+00,  5.7371e-02,
        -2.3496e-01,  5.8080e-01, -1.0382e+00, -2.3502e-01, -1.1932e+00,
        -2.8178e+00, -7.8160e-01, -9.5652e-01,  1.8515e+00, -3.8445e-01,
         8.9371e-02, -1.1716e+00, -4.5849e-01,  2.3067e-01,  1.8391e-01,
         6.8497e-01, -1.0097e-02,  2.9292e-01, -4.8082e-01, -5.6568e-02,
         1.2054e+00, -1.5583e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1742, 33.1795,  0.3390,  0.2958,  0.3863,  3.0550, -1.9437,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4493e+00, -1.3567e+02,  5.2357e-01,  1.1072e+00,  1.8401e+00,
        -6.2076e-01, -2.1441e+00,  1.5224e+00,  4.1912e-01,  1.4927e+00,
        -5.7475e-03, -3.3982e-01,  1.1932e+00,  2.2902e-01, -2.1893e-01,
        -2.6954e-03,  3.8472e-01, -5.4042e-02,  1.3777e+00,  2.7586e-01,
        -4.1805e-01, -7.8739e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6372e+00, -3.8338e+01,  1.0458e-01, -2.7532e-01,  1.0648e+00,
         2.7119e-01,  5.5812e-01, -1.8753e-01,  2.4979e-01, -6.7526e-02,
        -1.4435e-01,  4.4365e-01,  6.4856e-02, -3.5259e-01, -6.1493e-02,
        -8.0247e-02,  1.1679e-02, -2.4333e-02, -3.7718e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9524e+00, -9.4924e+01, -3.3495e+00,  7.5711e-01, -2.0127e-01,
        -9.5508e-02,  1.6427e+00, -6.2060e-01, -5.0886e-01, -4.5922e-01,
        -2.6895e-01, -3.8075e-01, -1.6080e-01, -6.6068e-01, -5.1341e-01,
         1.4571e-01, -1.6813e-01, -3.4785e-01,  2.5622e-01, -7.5533e-01,
        -3.9888e-01,  2.0272e-01,  2.6827e-01,  1.2036e+00, -2.0929e-01,
        -2.9656e-02,  2.2809e-02,  6.1975e-01, -1.8757e+00, -2.3219e+00,
        -3.0929e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 3.1749e+00, -4.3803e+01, -4.6185e+00,  1.2422e+00,  6.0813e-02,
         8.7996e-01,  1.5583e-02,  1.2315e-01,  3.2202e-01, -1.6948e-01,
        -1.4874e-01,  5.4174e-01, -6.0200e-01, -2.0276e-01, -3.8383e-02,
        -3.9604e-01,  1.5925e-01, -1.0499e+00,  3.0777e-01,  1.6500e-01,
         3.3036e-01,  1.3829e-02,  8.3690e-02,  1.8145e-01, -5.3846e-02,
         1.5805e-02,  4.7263e-01,  1.0498e+00,  1.8164e-02,  3.4310e-01,
         4.2108e-01,  2.6010e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.0436, -13.1622,   0.0178,  -0.2573,   0.1892,  -0.1589,   0.1119,
         -0.1259,  -0.3783,   0.1041,   0.2834,   0.0677,   0.1475,   0.1468,
          0.0219,  -0.2225,   0.0163,   0.0848,   0.0781,   0.1301,   0.1913,
         -0.1362,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7680e-01,  1.7339e+01,  7.4637e-02, -1.9447e-01, -8.1759e-02,
         5.4375e-02, -2.6338e-01, -2.4302e-01, -2.3016e-01,  4.4045e-02,
         1.9045e-01,  7.8587e-02,  2.8181e-01,  1.1964e-01,  1.3550e-02,
         1.0636e-01, -6.7257e-03,  2.9568e-01, -4.5914e-02,  3.5871e-01,
         2.1083e-01, -1.0675e-01,  2.8403e-02, -3.4285e-02, -4.0670e-02,
        -1.0471e-01, -1.6141e-02,  1.5667e-01,  1.9820e-01,  1.3623e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3621e-01,  1.4797e+01,  3.4743e-01, -2.6488e-01,  3.4545e-02,
        -1.8329e-01,  1.3774e-02, -1.1450e-01, -1.0512e-01, -8.1675e-02,
        -1.1058e-01,  1.2233e-02,  2.3189e-03,  2.4680e-01, -3.7283e-02,
        -5.8682e-02, -5.1455e-02, -4.7488e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4405e+00,  4.7125e+01,  5.3112e-01,  6.8337e-02, -1.2142e-01,
        -7.0510e-02, -6.9578e-01,  3.4743e-01,  2.1171e-01,  1.3709e-01,
        -1.7179e-01, -1.1954e-01, -1.1505e-01,  1.2237e-01, -2.8872e-01,
        -6.4703e-01,  5.1196e-01, -1.9596e-01,  3.0140e-03,  2.4396e-01,
         1.3593e-01, -2.7628e-01, -2.6022e-01,  3.0590e-01,  3.2222e-01,
        -6.6223e-03,  1.9247e-01,  4.1462e-02, -2.7150e-02,  1.7318e-01,
         7.0126e-02, -1.0344e-01, -1.4408e-01,  2.4514e-01,  1.5772e-01,
        -3.9109e-01, -8.5937e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1254e+00,  3.1668e+01, -3.0553e+00,  1.2287e-01,  4.5788e-01,
        -1.4955e-01, -1.9431e-01,  3.8641e-01,  1.0758e-01,  8.1286e-02,
         1.0834e-01,  1.3864e-01, -1.2072e-02, -1.3248e-01,  6.4341e-02,
         1.8603e-01,  3.4658e-01, -1.3305e-01,  3.1327e-02, -5.2516e-02,
         6.0847e-01, -8.4371e-02,  1.4330e-01, -1.0854e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6211e-01,  1.1240e+01, -2.0880e-01, -1.7209e-01,  1.0705e-01,
         4.5979e-02, -1.1930e-01, -4.5700e-02,  5.7234e-02, -1.4073e-01,
        -5.4404e-02, -1.4456e-01, -4.4712e-02, -1.4679e-02, -6.6433e-02,
        -2.9048e-02, -4.3539e-02, -1.6892e-02,  1.2659e-02,  8.2167e-03,
         6.7335e-02, -1.6610e-02,  3.1645e-02,  3.6433e-02,  1.3741e-01,
         8.8662e-02, -1.4087e-01, -3.5375e-02,  7.1825e-02, -4.4365e-02,
         1.3918e-01, -3.4714e-02, -1.8154e-02,  6.3771e-02,  9.4177e-02,
        -4.6368e-02, -9.2964e-02, -3.8445e-02, -5.0446e-03, -4.5761e-02,
        -1.4802e-02, -5.7497e-02,  3.0036e-02,  6.4490e-02,  5.4783e-03,
         1.0166e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6073e-01,  4.8218e+00, -8.3611e-02, -6.2060e-02, -5.1268e-02,
        -2.6389e-02,  2.7977e-02, -1.4896e-02,  9.5169e-04, -8.6346e-02,
         1.7031e-02, -5.3896e-02,  3.6221e-02, -5.0591e-02,  1.9568e-02,
         3.5850e-02,  3.5889e-02, -3.2328e-02, -1.0750e-02,  3.4439e-02,
         6.9594e-02,  2.9658e-02,  1.6461e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7051e+00,  6.7948e+01, -3.0177e+00, -5.9511e-01,  4.4538e-01,
         1.0892e+00,  1.9337e-03, -1.3875e+00, -7.2976e-01,  7.8691e-02,
         7.0781e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9176, 28.8741,  0.8287,  0.2444, -0.4412, -0.5530,  0.1970,  0.4005,
         0.6257, -0.1280,  0.4161,  0.2928,  0.5319, -0.1062,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3590, 26.0031,  0.6188, -0.1965, -0.2636,  0.1780,  0.0538, -0.0362,
         0.2570,  0.6551,  0.7707, -0.0306,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0560e+00, -5.0170e+01, -3.7309e+00, -1.1898e+00, -8.3408e-02,
         7.3737e-01,  1.0235e+00, -1.5896e-01, -5.0537e-01, -5.0339e-01,
         5.9377e-02, -2.6806e-01, -9.8611e-02, -1.0037e-01, -5.3127e-02,
        -3.0201e-02,  1.5819e-01,  1.8896e-01,  5.3329e-02,  4.0941e-01,
         2.8499e-02, -9.0927e-02, -3.3173e-01,  6.0942e-01, -2.2304e-01,
         1.3228e+00, -1.9954e+00, -3.3520e-01,  3.4808e-02,  1.3747e-01,
        -4.8975e-02, -1.5676e-03, -2.0539e-01,  5.4090e-02, -4.3828e-02,
        -5.9910e-02, -1.8183e-02, -1.1440e-01,  3.0955e-01,  1.4207e-01,
        -1.4721e-01, -5.8660e-02,  8.0386e-02, -5.7838e-02,  1.8947e-02,
         2.9363e-01, -2.0011e-01,  6.1470e-02, -2.5851e-02, -3.3523e-02,
        -1.4299e-01,  1.7608e-01,  4.4214e-01, -3.8375e-01, -1.0908e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 4.0545e-01, -8.7654e+01,  3.6547e+00,  1.6023e-01, -1.3219e+00,
        -1.4151e+00,  5.4415e-01,  1.4753e+00, -1.4628e+00, -7.4027e-01,
         1.9916e+00, -7.4064e-02, -1.1010e+00, -6.4587e-01, -2.1831e-01,
        -9.4372e-01, -5.0603e-01,  2.9466e-01, -6.6098e-02, -1.2087e+00,
         1.1076e+00,  2.4509e-02,  8.7686e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7485e-01,  1.1812e+01, -9.6720e-02,  2.7635e-01,  8.4302e-02,
         1.9378e-01,  8.1810e-03,  1.1504e-01, -2.1105e-02,  1.2239e-01,
        -2.0208e-01, -1.1219e-01, -7.7183e-02,  1.3893e-02,  1.2663e-02,
        -8.8954e-02, -1.5776e-01,  5.1719e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8875e-01,  1.8222e+01,  1.9229e-01,  4.3831e-01,  1.9516e-01,
         3.8298e-01, -1.7646e-03,  4.1944e-01,  7.9826e-03, -9.7398e-02,
        -1.3366e-01,  2.8798e-01,  5.0561e-01, -1.2006e-01, -4.9539e-02,
         1.0787e-01, -5.6228e-03, -7.4411e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8716e+00,  4.9521e+01, -2.6182e-01,  6.6256e-01,  5.3864e-02,
         1.1291e+00, -6.5156e-01,  2.6031e-01, -7.8018e-01,  2.6672e-02,
        -1.4259e-01,  3.0201e-01, -5.4776e-01,  4.4790e-02, -2.3240e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3697e+00,  5.6668e+01, -3.3521e-01,  4.8357e-01,  7.0126e-01,
        -5.9194e-02, -2.0990e-01,  7.9066e-02,  4.7790e-01,  7.9669e-01,
         9.9712e-01, -6.9536e-02,  2.9580e-01,  4.1513e-01,  2.2408e-01,
        -1.3810e-01, -1.9001e-01, -1.3478e-01, -1.8153e-01,  7.4806e-01,
         7.9592e-01,  8.9137e-02,  4.8090e-02, -1.5264e-01, -9.0877e-01,
         2.3119e-01,  4.0664e-01, -2.1768e-01,  1.4938e-01,  4.0442e-01,
        -5.2548e-01,  1.4193e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.1310e-01,  1.1471e+01,  1.2242e-01,  8.3062e-02, -1.4830e-01,
        -7.4283e-03,  1.5348e-02,  4.2865e-02,  2.6430e-02,  2.4776e-02,
        -1.2460e-02,  8.9817e-03, -6.6869e-02, -1.1696e-01, -3.7031e-03,
        -8.3240e-02, -5.6415e-02, -1.0222e-01, -1.6573e-01, -7.3592e-02,
         5.9058e-02,  1.1829e-01, -1.7245e-01,  4.9634e-03, -9.4617e-02,
        -6.7953e-02, -1.0192e-01,  7.3420e-02, -5.4969e-02, -1.6417e-02,
        -9.5730e-02, -6.1203e-02,  2.7964e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9379e+00,  8.2340e+01,  1.9342e+00,  1.9537e+00,  2.4734e+00,
         3.3728e-01,  1.8669e-01, -6.4495e-01,  1.9628e-01,  1.1412e+00,
         9.5467e-02, -7.6135e-01,  1.3659e+00,  2.5360e-01,  2.4561e+00,
        -4.1888e-01, -2.7528e-01,  1.3800e-02, -8.3217e-01, -6.3236e-01,
        -1.3675e-01, -2.9102e-01, -7.4371e-02, -1.5042e+00,  9.2267e-01,
        -2.8285e-01,  2.5580e-01,  3.9098e-01,  6.1256e-01,  4.7935e-01,
         2.8530e-01,  5.3183e-01,  9.7355e-01,  1.1035e-01, -2.0761e-01,
        -4.0204e-02, -2.1950e-01, -2.5403e-01,  9.9153e-03,  1.9795e-01,
        -1.6081e-01, -1.2833e-01, -8.5568e-02,  5.1155e-01,  3.9856e-01,
         1.5714e+00, -2.3944e-01, -2.5475e-01, -1.3109e+00,  1.2924e-01,
        -5.6460e-01,  9.3960e-01,  1.4891e-01,  2.6569e-01,  4.8148e-01,
         5.1314e-01, -2.4241e+00, -9.2232e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4759e-01,  2.7814e+00, -1.0771e-01,  7.3841e-02,  7.6980e-03,
         3.6970e-03,  5.4076e-03, -1.1005e-02, -1.7154e-02,  1.5692e-02,
         4.6615e-05,  1.3141e-02,  3.0797e-02, -3.6946e-02,  1.5943e-02,
         1.5706e-02,  3.6548e-02, -7.4152e-03, -1.2847e-02,  9.9702e-05,
        -1.1958e-02, -7.9672e-03, -3.3579e-02, -2.1576e-02, -2.0320e-02,
        -1.9043e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5524, 29.0575, -3.3132,  0.1673,  0.1613,  0.6956, -0.5675, -0.1958,
        -0.2258,  0.2368,  0.5647, -0.2577,  0.3535,  0.4348, -0.3026,  0.0874,
         0.1256,  0.1446, -0.2136, -0.0969, -0.1394, -0.6430, -0.9321, -0.1878,
        -0.9856,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4461e+00, -1.0464e+02, -6.6604e+00, -1.9180e+00, -3.6601e-01,
         9.7352e-01, -7.7607e-01,  9.6633e-01,  1.2095e+00,  1.3701e+00,
         1.0596e+00,  2.1099e-01, -7.3067e-01, -1.0013e+00,  9.6921e-03,
         8.7028e-02, -3.7101e-01, -8.3896e-01,  1.2910e-01,  5.7952e-01,
        -1.2315e+00,  3.8285e-01,  5.2249e-01, -2.0249e-01, -1.5599e-01,
        -2.1393e-01, -5.7945e-01, -1.1919e-01, -6.7576e-02, -5.6466e-01,
        -6.2992e-02,  6.5674e-01,  4.0751e-01, -2.2222e-01, -1.1577e+00,
         3.4007e-01,  3.2210e-01,  9.4523e-02,  3.1538e-01,  1.1397e-01,
         5.0317e-01,  5.7532e-01,  6.1931e-03, -7.7361e-01,  5.3192e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3145e+00,  2.1936e+01, -8.9136e-01,  1.9719e-01,  1.1613e-01,
        -2.5378e-01, -2.6048e-01, -3.1879e-01, -1.1108e-01, -6.0487e-01,
        -2.0330e-01,  3.4637e-01,  1.2034e-01,  1.7535e-01, -3.3896e-01,
         2.6924e-01, -2.6050e-01, -2.0274e-03,  2.8783e-02,  1.1247e-01,
         3.3164e-01,  1.5053e+00,  4.8117e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0670e-01,  2.4493e+00, -1.7459e-01, -6.3676e-02, -2.4629e-02,
         1.7575e-02, -1.8398e-02, -1.6311e-02, -4.9258e-04, -2.1730e-02,
        -8.9355e-02,  4.0808e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([  1.8760, -78.3438,  -0.7407,   1.4784,  -0.7152,  -1.4594,   0.4945,
         -0.4311,  -1.0718,   1.3061,   1.9664,   0.6494,   1.0713,  -1.0814,
         -0.6152,   0.2692,  -0.6249,  -0.3298,  -0.3953,   0.4345,   0.3555,
          0.4171,  -0.8918,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5492e-01,  1.5290e+01,  3.4711e-01, -2.6311e-01, -1.5892e-01,
        -2.7870e-01, -1.8455e-01,  5.7421e-02, -1.1082e-01,  2.4887e-01,
         6.2738e-02, -1.8406e-01,  2.7653e-02,  1.7752e-02, -2.1272e-01,
        -8.6859e-02, -7.1154e-02, -1.5362e-01, -1.0922e-02,  7.3809e-02,
        -1.2163e-01,  3.1783e-02, -1.5254e-01, -2.5320e-02, -1.0027e-01,
         2.2859e-02, -1.1359e-01, -1.1732e-01, -7.5811e-02,  4.3172e-02,
        -1.2967e-02, -3.2024e-02,  1.0159e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7818e+00,  4.5474e+01,  4.4543e-01,  1.6490e-01,  2.0213e-01,
        -2.0199e-01, -9.1307e-01, -7.7829e-01,  2.5729e-01,  5.1097e-01,
        -1.2121e-01,  2.8814e-01,  4.1882e-01,  1.1157e+00,  4.6108e-01,
        -3.4431e-02, -2.4898e-01,  1.2052e+00, -5.8541e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -6.0856, -41.3307,   1.0068,  -1.1054,   1.5981,  -0.8413,  -0.7415,
          0.4467,   0.3966,  -0.5962,   0.3346,   0.8238,   0.2995,   0.2083,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7115, 76.7091,  3.2448, -1.8164, -1.2850,  1.0376,  0.9893, -0.8231,
         0.7807,  0.4905,  0.2066, -0.1128,  0.2652,  0.8591, -0.3516,  0.5584,
        -0.3944, -0.4250,  0.3079,  0.4654, -0.1459,  1.6466,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5209e-01, -5.7113e+01,  6.2246e-01, -7.0642e-01, -3.1727e+00,
        -3.9732e-01,  4.2642e-01,  4.7431e-01, -2.4183e-01, -8.6842e-01,
         2.2382e-01, -6.3634e-01,  1.0544e+00,  2.3106e-01, -4.1833e-01,
         1.0264e-02, -2.8927e-01, -6.3013e-02,  4.9438e-01,  8.6231e-02,
         9.8936e-02, -9.6032e-02, -9.6826e-02,  6.2696e-01,  4.3739e-01,
        -4.4068e-01, -1.0427e-02, -2.6114e-01, -6.1123e-01, -1.7488e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1263, 11.8606, -0.5662,  0.0723,  0.1156, -0.1655,  0.5521, -0.2080,
        -0.1967,  0.0605, -0.1280, -0.0495, -0.0354,  0.1910,  0.1383, -0.1201,
         0.5181,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4927,  4.4647,  0.2430,  0.1430, -0.0542,  0.1385, -0.0679,  0.1485,
         0.1940,  0.0080, -0.0169,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9477e+00,  1.1771e+02,  1.0902e+01, -5.4944e-01, -7.0809e-01,
         3.0265e+00, -6.8413e-01, -1.7889e+00, -9.5777e-02, -2.0031e-01,
         6.7733e-01, -6.7682e-01, -1.2010e+00,  7.3606e+00,  4.0678e+00,
        -2.4644e+00, -3.1698e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3950e-01,  3.8595e+01, -1.0833e+00,  8.1529e-01,  7.5465e-01,
        -2.7989e-01, -1.4894e-01,  1.8778e-01, -1.0464e-02, -2.1819e-01,
        -4.3111e-01,  5.9869e-01, -2.9842e-01,  5.2374e-02, -1.6821e-01,
         1.1074e-01, -1.3043e-01,  6.2953e-03, -5.6591e-01,  6.1954e-03,
        -5.9862e-02, -3.6134e-01,  9.0453e-01, -4.5425e-03,  2.5121e-01,
        -1.5789e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0513e-01,  1.3438e+01,  6.5091e-01, -2.2266e-01, -5.3581e-02,
         5.9588e-01, -2.6928e-01, -7.2915e-03,  3.2026e-02, -6.5607e-02,
        -9.9680e-02, -8.1602e-02, -7.8151e-02, -7.6451e-02, -5.4410e-03,
         2.0038e-02, -4.9413e-02,  1.4839e-02,  2.4124e-03,  1.3433e-02,
        -1.3194e-01,  5.7063e-02, -6.7515e-02, -5.5840e-02, -6.5769e-02,
         1.0845e-02, -6.3068e-03,  8.8587e-02,  6.8773e-03, -2.4386e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0516e-01,  3.7445e+01, -4.7345e-01, -9.7338e-01,  6.6511e-01,
        -3.2004e-01,  3.2051e-01, -2.6876e-03,  6.5507e-01,  2.5216e-01,
        -2.5567e-01, -6.5047e-02, -1.5574e-01,  1.1736e-01, -3.4713e-01,
         1.6649e-01, -5.5719e-01, -3.9764e-02,  2.1356e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([  6.0394, -65.7533,   1.5379,  -2.6554,  -0.1844,   0.5127,  -1.8322,
          0.4168,   0.0848,   0.0785,   0.1239,   0.3698,  -0.1596,  -0.9093,
          0.8436,  -0.9148,  -1.4401,  -1.1089,  -0.7939,  -0.3079,   0.2260,
          0.7668,   0.9724,  -0.1687,   0.5598,  -5.5042,   1.1571,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5520e+00, -3.2020e+01,  4.0534e-01,  2.4850e-01, -2.3225e-01,
        -3.5141e-01, -3.7391e-01, -6.2018e-02, -1.8182e-02, -1.4979e-01,
         4.7980e-03, -1.3182e+00, -4.8240e-01, -4.3761e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3268e+00,  7.4271e+01, -5.0366e-01,  3.0013e-01, -1.6756e-01,
         8.5642e-01, -1.2306e-01,  7.0415e-01,  1.3187e+00,  3.6141e-01,
         1.0725e+00,  2.7850e-02, -1.0450e-01,  5.0056e-01, -6.3296e-02,
        -3.6821e-01, -3.8289e-01, -3.5057e-01, -8.4871e-01,  4.3725e-01,
        -5.2675e-01, -7.9419e-02,  3.0357e-02, -6.9188e-02, -1.8691e+00,
        -2.9475e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5863e-01,  5.6000e+01,  1.5949e+00,  2.0980e+00,  8.5995e-01,
        -7.6824e-01,  6.7404e-01,  3.1597e-02, -4.2050e-01,  8.2634e-01,
         4.9618e-01,  2.5011e-01,  2.4216e+00, -6.7297e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1677e-01,  1.7737e+01,  3.2285e-01, -2.3936e-01, -2.5426e-01,
        -2.4154e-02,  5.8362e-02, -2.2394e-01,  2.3010e-02,  7.5839e-02,
         4.9993e-02, -1.1518e-01, -3.4710e-02,  7.0260e-02, -4.0475e-02,
         7.7763e-02,  4.8831e-02, -7.3561e-04,  6.0929e-02, -8.5081e-03,
        -2.6628e-02,  5.0298e-03, -2.3778e-01, -1.2536e-01, -1.1886e-01,
        -6.6882e-02, -1.0838e-01,  1.2137e-02,  1.4823e-01,  1.3947e-01,
        -1.2375e-01,  5.2666e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1672e+00, -6.7016e+01, -2.0794e+00,  7.0067e-01,  1.6618e+00,
        -9.7050e-01,  5.6247e-01,  1.0916e+00, -7.5139e-03,  4.4395e-02,
         6.4935e-01, -1.4679e-01,  1.7405e-01,  2.8314e-02, -9.6889e-03,
        -1.0667e-02,  8.8300e-02,  3.1520e-01,  4.1518e-01,  1.3218e-01,
         5.3008e-01, -1.8936e-01, -1.1787e-01,  9.6752e-01,  1.7164e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6198e-01,  6.0656e+00, -2.5769e-03, -6.8715e-01,  1.9657e-01,
         1.6199e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5475e-01,  6.4118e+00,  1.3877e-01, -1.4361e-01,  4.5631e-02,
        -6.1348e-02, -1.5779e-02, -8.6493e-03,  2.6444e-02, -4.4142e-02,
        -1.1774e-02,  6.9954e-02,  4.7577e-03,  5.7504e-02, -7.6572e-02,
        -1.7998e-02,  3.0958e-03, -3.1147e-03, -2.1719e-02,  5.5348e-03,
        -3.5509e-03,  1.4160e-02, -4.6967e-02,  2.9562e-03,  1.6629e-02,
         5.2013e-02,  2.0383e-03, -2.4669e-03, -5.9763e-03,  3.3148e-02,
         4.6376e-03, -5.9723e-03,  4.8368e-02, -4.9876e-03,  2.9620e-02,
         5.2491e-02,  1.1071e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8857e-01, -3.8641e+00, -9.2099e-02, -1.9589e-01, -2.5475e-02,
        -5.8088e-02, -2.9769e-02,  7.9266e-03,  1.1923e-02, -1.6029e-02,
        -4.4379e-03, -3.2347e-02, -7.6821e-02, -3.0756e-02, -8.8993e-02,
         1.3181e-01,  1.0069e-02, -3.8409e-02, -2.0220e-02, -2.7044e-02,
         4.3204e-02,  2.1361e-02,  2.6148e-02, -4.1405e-03, -1.0002e-02,
         6.3662e-03,  2.8524e-02, -1.7565e-02,  2.0080e-02,  1.8623e-03,
        -2.9955e-03, -3.7521e-03,  2.6395e-03, -2.4028e-04, -1.5264e-02,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -5.0671, -70.6482,  13.4628,   1.0610,  -1.0096,  -0.8760,   0.3684,
          1.3151,   0.7398,  -0.5931,   1.2159,   1.5813,   0.4926,   0.6782,
          1.8304,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7354e-01,  8.3943e+01,  1.1518e+00,  1.1016e+00,  4.3273e-02,
         9.4637e-02, -3.1789e-01,  9.9924e-01,  1.1651e+00, -5.1565e-02,
        -6.3559e-01, -8.1666e-01, -1.4705e-01, -5.8362e-02, -7.5111e-01,
         2.9546e-01, -3.3264e-01, -4.9588e-01,  3.5773e-01, -1.3041e-01,
         1.1626e-03, -4.3194e-01,  3.4009e-01, -2.7092e-01,  2.2868e-01,
         3.7554e-02, -2.8346e-01, -5.5048e-02, -3.1471e-01,  2.1582e-01,
         1.6532e-01,  1.7887e-01,  1.3126e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5084e-01, -1.8950e+01, -2.7833e-02, -5.4546e-01,  1.4455e-01,
        -1.7129e-01,  1.2414e-01, -1.3420e-01,  2.1832e-01,  1.3585e-01,
        -6.5788e-02, -2.0068e-01, -1.2317e-01,  2.8661e-01,  1.7658e-01,
         8.2051e-02,  1.0596e-01, -6.9429e-02, -5.7871e-03, -5.1579e-02,
         1.6569e-01,  9.8838e-02,  1.3530e-02,  1.2139e-02, -3.2704e-02,
        -9.0722e-04,  3.4121e-01,  2.1999e-01,  4.2442e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 3.1936e-01,  3.8059e+00, -7.7150e-02, -4.2037e-02,  5.2453e-02,
         1.0701e-02, -1.0930e-02, -4.7480e-02,  3.2450e-03, -9.5114e-02,
        -2.9843e-02, -5.4592e-02, -1.1974e-02, -9.9440e-02, -6.1296e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0641e-01,  1.4925e+01,  1.9819e-01,  5.7234e-02, -2.3694e-02,
         1.1607e-01,  8.8817e-03, -3.6236e-02,  7.5480e-02, -6.9804e-02,
        -1.2522e-01, -4.4986e-02, -1.2212e-02,  1.3697e-03,  1.5543e-02,
         1.5268e-03, -3.2695e-02,  3.1074e-02, -3.2361e-03, -3.8762e-02,
        -3.9146e-02, -2.5207e-02, -8.7022e-02,  5.1110e-02,  9.4591e-02,
         5.7062e-02, -1.2176e-01,  4.0517e-02, -7.9158e-03, -3.5743e-02,
         1.8569e-02, -3.4855e-02, -1.6523e-02, -4.3913e-02, -9.2312e-02,
         4.4652e-02, -5.5728e-02, -1.3246e-02,  4.3831e-02, -9.8666e-03,
        -1.1872e-01, -4.1876e-03,  1.1546e-02, -5.0303e-02, -1.0620e-02,
        -2.1675e-01,  8.9998e-03,  7.1852e-02, -1.2828e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4278e-01,  2.7181e+00, -9.6108e-02,  5.5695e-03, -3.5791e-02,
         2.2236e-02, -5.0659e-05,  3.2999e-03, -6.8594e-02,  6.5065e-03,
        -1.2943e-02, -6.5626e-03, -1.4275e-02,  9.2422e-03, -2.3848e-02,
        -7.7746e-03,  8.2315e-02, -3.5168e-03,  6.6450e-03, -1.9671e-02,
        -9.0703e-03, -8.6948e-03, -3.3784e-02,  1.6157e-04, -1.2775e-02,
         2.4157e-03, -9.5306e-03,  9.2110e-03,  1.5798e-02, -3.6548e-04,
         1.3140e-02, -2.3364e-02, -1.3028e-02,  1.5705e-02,  8.3834e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9475e-01,  3.6405e+01, -5.1731e-01,  1.5298e+00, -1.8914e-01,
         4.2580e-01,  4.7312e-01,  1.0925e-01,  9.4898e-02, -1.4581e-01,
         7.2376e-02, -1.7118e-02, -1.9222e-01, -5.9605e-02, -8.6529e-02,
        -2.5662e-01, -1.6385e-02, -1.8525e-01,  4.0797e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([   2.1617, -104.2759,    2.5803,   -4.7501,   -1.2774,    0.3521,
           0.3972,   -0.7689,   -3.0657,    0.8094,   -0.2802,    0.8098,
          -0.8275,    0.5009,    3.6039,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4993e-03,  1.9028e+00, -1.5273e-02, -1.0500e-02,  4.2180e-02,
         3.4390e-02, -8.2269e-03,  3.6754e-03,  1.0057e-02,  1.8802e-02,
        -1.6779e-03, -7.8170e-03, -3.2434e-02,  8.5722e-03, -2.1355e-02,
         5.0476e-03, -1.2248e-02,  8.7814e-04, -2.3897e-02,  4.3199e-06,
         3.4376e-04,  3.8105e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4839e-01,  5.1030e+00,  1.5987e-01, -6.2757e-02, -6.8783e-02,
        -1.2052e-02, -2.6203e-02,  3.3171e-02,  1.7150e-02, -6.0126e-02,
        -6.3752e-02,  3.0140e-03,  1.3868e-02,  3.6423e-02, -3.8144e-02,
        -1.4297e-03,  3.7741e-02,  6.5119e-02, -3.3189e-02,  6.4361e-03,
         1.3777e-03,  2.1118e-02, -2.6850e-03,  4.0203e-02, -8.2564e-03,
         1.5937e-02, -5.3817e-03,  2.2787e-02,  3.6312e-03, -3.0300e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2547,  1.5968, -0.0508, -0.0482, -0.0845, -0.0312,  0.0098, -0.0157,
        -0.0322,  0.0447, -0.0537,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0410e+00,  2.9891e+01, -1.7779e-01,  2.8997e-01,  6.4377e-01,
         1.2370e-01,  8.4452e-02, -9.6105e-02,  1.3722e-01,  3.4126e-01,
        -6.8177e-02, -4.8281e-03,  1.2813e-01,  2.9021e-03, -1.3748e-01,
         3.1856e-02, -6.4237e-02, -1.4393e-01,  1.1592e-01, -1.0179e-01,
         4.4247e-01, -3.0184e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0536e-02,  5.8472e+00,  1.3798e-01, -4.4006e-03,  1.2395e-02,
         7.3244e-02, -5.2268e-02,  5.2732e-02, -2.4104e-03, -5.6789e-02,
        -1.2135e-01, -3.6788e-02,  2.6457e-03,  2.9428e-02, -5.8543e-02,
         8.2065e-03, -8.0970e-04, -4.5772e-03,  5.1214e-02, -5.9060e-03,
        -4.0357e-03, -2.0649e-02, -7.3127e-02, -1.1430e-02, -5.9688e-03,
        -1.1573e-02, -4.2028e-03, -3.0722e-02,  1.1177e-02, -8.3260e-04,
        -8.2134e-02, -8.8621e-02,  7.7179e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3635e-01,  2.1464e+01,  1.0461e+00, -6.0377e-01, -9.0571e-02,
         1.1236e-01, -3.5948e-02,  1.1016e-01,  3.5931e-02, -1.0630e-01,
        -9.4983e-02,  3.1496e-02, -2.0456e-01, -1.9579e-01,  3.0041e-03,
        -1.7423e-01,  2.9462e-02, -2.4470e-01, -1.8179e-01,  4.9076e-02,
         3.4308e-01, -1.6254e-01,  3.4495e-02, -4.1830e-02,  2.3553e-01,
         1.5787e-01, -3.9291e-01,  1.1607e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4978e-02,  3.9290e+00,  8.1749e-02,  2.1322e-02,  3.8406e-02,
        -8.3207e-03,  1.8554e-02,  4.9323e-02, -7.3600e-02, -2.5148e-02,
        -8.8961e-02,  6.5541e-04,  1.8820e-02, -1.8489e-02, -1.1637e-02,
        -3.4566e-02,  5.5584e-04, -3.4368e-02, -2.0318e-02, -1.5220e-02,
        -6.6237e-02,  3.2874e-02, -7.7005e-02, -6.7506e-02, -6.3064e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 4.0042e-01,  4.5931e+01,  2.1807e-01,  1.4290e+00,  1.8728e-01,
        -4.2056e-01, -4.7541e-02, -1.8323e-02,  1.5446e-01, -1.5211e-01,
        -1.2176e-01, -2.9889e-01,  1.5206e-02,  1.2311e-01, -4.0205e-02,
        -2.2805e+00, -3.1102e-01,  8.6782e-02, -3.4623e-02, -1.2107e-05,
        -1.6444e-01, -2.4800e-01, -3.4723e-02, -9.1108e-02,  2.4235e-01,
        -7.6285e-02,  3.3026e-01, -2.2832e-01,  7.3728e-03,  1.8316e-01,
        -4.4510e-01, -2.5235e-01, -3.7375e-01,  3.0041e-02, -2.6475e-04,
         5.6751e-02, -1.1800e-01, -1.6821e-01,  2.8023e-01, -5.0264e-01,
         1.2878e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.1669e-02,  4.1016e+01,  8.6437e-01, -1.2056e+00,  5.8476e-01,
        -1.2155e-01, -3.0087e-01,  6.5074e-01,  4.4365e-02,  2.7078e-01,
        -2.1478e-01,  3.8946e-01,  2.3419e-01, -2.2594e-02, -3.1438e-02,
        -7.8141e-04, -9.6178e-02,  2.7462e-01,  1.8874e-02, -2.6591e-01,
        -7.2103e-02, -1.7582e-01,  5.8520e-02, -1.0983e+00,  9.4021e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8995e-02,  1.1013e+01,  6.8269e-01,  3.6900e-02,  1.8542e-02,
         1.6888e-01,  2.9399e-01,  1.1239e-01,  2.5550e-02,  1.3260e-01,
         9.8278e-02,  3.7517e-02,  1.1706e-02,  1.3587e-01,  7.8532e-02,
        -1.8924e-02,  2.1092e-02,  1.2892e-02, -3.3983e-02, -5.2591e-03,
        -2.6734e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6502, 15.4064,  0.6253, -0.3231, -0.2675,  0.0992, -0.1183,  0.0384,
        -0.0967, -0.0916, -0.7328, -0.5479, -0.1516, -0.1578, -0.0894, -0.1185,
        -0.4526,  0.2445,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.2172e-02,  6.1679e+00,  2.2135e-01,  2.5800e-01,  2.8536e-02,
        -2.0834e-02,  7.0347e-03, -1.1194e-01, -4.5099e-02, -2.2302e-04,
         4.8145e-02, -2.5402e-02, -3.7659e-02,  4.3976e-02, -7.2616e-03,
         1.3020e-02, -5.4848e-02,  4.2859e-02, -1.7511e-02, -9.4085e-04,
        -7.6591e-03, -2.5067e-02,  7.5539e-03, -7.2116e-02,  6.0097e-02,
        -5.7886e-03, -7.0762e-02,  1.7291e-02, -2.7751e-02, -1.9626e-02,
         1.0309e-02, -3.9076e-03,  2.7241e-02,  6.9327e-03,  6.9661e-03,
         3.6389e-02, -3.4440e-02,  1.6423e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2538, 68.7426, -1.6315,  1.6466, -0.6231, -0.0967, -0.4123,  1.4443,
         1.0071, -0.0785, -0.9921, -0.4195, -0.3197,  0.9068,  0.2130,  0.1447,
         0.1029, -0.4189, -0.4631, -1.5123,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5470e-03,  1.1969e+01, -1.1797e-02, -1.0826e-01,  1.2848e-01,
         2.9145e-01,  5.6024e-02,  2.6624e-01,  5.9174e-02, -1.1566e-01,
         9.1057e-02, -6.0850e-02, -7.3549e-02, -3.9238e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7048e-02,  1.5231e+00, -5.5271e-02,  2.2876e-02, -3.6984e-03,
        -6.8234e-03, -1.3098e-02, -5.0518e-03, -1.4716e-02,  8.8125e-03,
        -7.3837e-03,  3.2963e-03, -6.0867e-03, -1.9847e-03,  9.2107e-04,
        -1.4048e-02,  7.6907e-04,  9.7004e-03,  6.2947e-03, -4.4495e-03,
        -3.0201e-04,  1.8837e-02, -6.1610e-03,  2.7521e-03,  1.9746e-04,
        -2.7562e-02,  1.3126e-02,  1.0314e-02, -2.1731e-03, -3.6261e-02,
         2.7742e-03, -2.0459e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4815e+00,  3.0771e+01,  2.3798e-01, -2.7218e-01,  8.2612e-01,
        -4.5200e-01,  3.6772e-01, -1.4375e-01,  1.3741e-01, -9.7469e-02,
         2.0128e-02, -2.5328e-01, -5.2190e-02,  3.2560e-01,  1.0734e-02,
         1.1493e-02, -3.6371e-02,  3.5434e-01, -2.0862e-01, -1.7069e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2003e-02,  1.1989e+00, -4.0700e-02, -2.1148e-02,  3.0047e-03,
         1.3808e-02,  3.8577e-03, -2.6463e-04, -9.8489e-03, -2.4658e-03,
         2.9464e-03, -2.9220e-03, -8.5588e-03,  1.7221e-03, -1.5482e-03,
        -1.1899e-02, -4.4928e-03, -1.0596e-02,  2.1635e-03,  4.1989e-03,
         1.9772e-03,  2.8948e-03, -1.3009e-02,  1.8884e-02, -4.7908e-03,
        -1.2772e-02, -1.6586e-02, -2.6678e-02,  6.6873e-03, -9.6808e-03,
        -4.3363e-05,  9.4384e-03,  1.6342e-02,  1.4475e-02,  3.1285e-03,
         1.5486e-02,  1.9809e-03, -2.8537e-03, -4.4676e-03, -6.3258e-05,
        -1.2493e-02,  1.2021e-02, -4.1010e-03, -3.1681e-04, -4.8732e-03,
         5.0382e-02, -4.1675e-02,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8709,  6.2801, -0.1509, -0.1117,  0.1451, -0.0142,  0.0140, -0.0639,
         0.0340, -0.0565, -0.0560,  0.0343,  0.0707,  0.0963, -0.3485,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1199e-03,  4.0150e+00,  2.1397e-02, -1.1919e-02,  8.5022e-03,
        -6.6380e-03,  3.2360e-02,  7.7640e-02, -1.3551e-03, -1.7011e-02,
         4.6046e-03, -4.9180e-03, -8.3468e-04,  6.2890e-03, -3.4316e-02,
        -1.6965e-02, -1.8276e-02,  2.1243e-02,  9.6496e-03, -4.7933e-02,
         2.8498e-02, -1.4768e-02, -2.6465e-02,  4.7732e-02,  1.6359e-02,
        -4.7677e-02,  1.5432e-02, -6.3969e-02,  3.6180e-03, -4.3430e-03,
         1.4933e-02, -4.8253e-02, -1.5116e-02, -2.1908e-02,  2.2181e-02,
         1.9804e-02,  1.1541e-02, -7.6467e-03, -2.8442e-02, -5.6769e-03,
         1.6001e-02, -3.2264e-03, -7.8957e-03, -4.5982e-03, -7.6984e-03,
        -6.4473e-03,  2.3982e-02,  7.4674e-02], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-9.0485e-01,  5.3661e+01,  4.9119e-01, -5.0460e-01,  9.2723e-01,
         1.1154e+00, -3.3843e-01, -2.7221e-01,  4.1207e-03,  3.7900e-01,
        -1.1265e-01,  1.5572e-02, -6.8974e-01, -6.5445e-02,  1.2306e-01,
         1.2700e-01,  2.6118e-02,  6.9617e-02, -1.2910e-01, -2.7130e-01,
        -3.0019e-01,  3.4458e-01,  5.4719e-01, -3.4614e-01,  8.1542e-01,
        -1.4180e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4828e+00,  5.3763e+01,  2.4805e-01,  1.1938e+00,  3.0312e-01,
         1.4342e+00, -5.6824e-01,  5.9506e-01, -5.4056e-02, -3.5144e-01,
         1.5812e+00, -4.7853e-01,  2.6926e-02, -1.6252e-01, -6.4429e-01,
        -1.5655e-01, -3.2837e-01, -4.3778e-01,  1.0568e+00,  5.2842e-02,
         2.9221e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5143e-02,  1.7128e+01,  3.2034e-01, -2.5699e-02, -7.6093e-01,
        -4.4339e-01,  1.7639e-01,  1.4449e-01, -1.0553e-01, -1.0943e-03,
         4.2475e-02,  7.1119e-02, -8.7311e-02, -9.5309e-02, -1.7607e-01,
        -7.7183e-02, -2.6318e-02, -1.8924e-01,  1.9703e-01, -2.0518e-01,
        -2.2804e-01, -1.3339e-02,  1.0599e-01, -1.1609e-01,  2.4989e-01,
         2.4445e-02, -1.1152e-01,  1.2683e-02, -7.0988e-03,  2.1023e-01,
        -1.1898e-01,  1.6072e-02,  1.7596e-01,  4.8177e-03, -1.1703e-01,
        -5.3518e-02,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0034e-01,  4.3194e+00, -4.3991e-01,  3.6731e-02,  5.9403e-03,
         4.0360e-03, -7.7263e-02, -3.8538e-02, -7.8840e-02, -4.3235e-02,
        -3.3700e-02, -2.1186e-02, -7.3818e-02, -1.2238e-01, -3.4805e-02,
        -3.3296e-02, -3.5483e-02, -2.0248e-02, -7.0639e-02, -2.1291e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.2189e-01, -7.3016e+01, -2.7398e+00, -2.3461e-01,  3.4103e-02,
        -6.4009e-01,  1.0692e+00,  7.3473e-01, -4.9765e-01,  1.5629e-01,
        -1.2330e+00,  5.1352e-01,  3.4415e-01,  6.8801e-01, -1.1020e+00,
         1.6789e-01,  3.2490e-01,  6.2509e-01,  6.4077e-01, -1.6743e+00,
        -3.9184e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1262e-01,  7.9456e+00, -2.8828e-01, -1.2287e-01, -2.9033e-01,
         3.3788e-02, -1.1915e-01, -1.6773e-01, -1.9506e-01, -1.2023e-01,
        -4.7568e-02,  3.8872e-03,  4.5535e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3991e-01,  1.2244e+01,  4.3075e-02, -2.1688e-01, -4.9068e-01,
        -2.8182e-01, -3.9823e-01, -7.6733e-02, -1.3170e-01, -2.3274e-02,
        -7.1715e-02, -2.3025e-01, -4.0836e-02, -2.4268e-02,  1.6470e-01,
        -3.1444e-02, -7.9043e-03, -2.9425e-02, -6.3022e-02,  3.5464e-02,
        -8.2436e-02,  1.9568e-01, -2.7604e-02, -5.2621e-02,  2.7563e-02,
         7.9021e-02, -9.6068e-02,  5.4563e-02,  4.2441e-02,  2.0720e-03,
         3.1446e-02,  3.6844e-03,  5.5406e-02, -3.4124e-02,  9.4398e-02,
        -2.8963e-01,  2.4180e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4554e-01,  4.8907e+00,  4.3713e-01, -6.1537e-04, -1.3291e-01,
        -8.2249e-02, -1.7393e-02, -1.4642e-02, -4.2571e-02, -4.6350e-02,
        -6.9288e-02,  1.5115e-02,  5.3928e-02, -1.3930e-02, -3.3972e-02,
         2.5418e-02,  2.4358e-02, -1.7168e-02, -2.1869e-02,  1.8225e-02,
        -3.9845e-02, -2.2319e-02, -1.4698e-02,  1.3317e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7180e-01,  2.2350e+00,  3.9876e-02,  4.2473e-02, -6.0471e-03,
         1.9965e-02, -2.8179e-03,  1.2338e-02,  2.6817e-02, -4.2226e-02,
         1.3411e-02, -2.6090e-02,  4.1320e-02, -1.0172e-02,  9.5529e-03,
        -1.7083e-02, -1.6814e-02, -1.1183e-02,  2.1769e-03,  3.9792e-03,
         1.0463e-02, -3.8907e-02, -2.8528e-02, -1.6217e-02, -2.6814e-02,
         3.7061e-03, -5.5026e-03, -9.5692e-03, -3.4517e-03, -1.5952e-03,
        -1.2905e-02, -3.9048e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3290e-01,  6.2791e+00,  1.3974e-01, -6.9888e-02,  3.3446e-03,
        -3.4353e-02, -1.0560e-01,  3.9437e-02,  3.4457e-02, -6.6442e-02,
         1.3215e-02, -1.6869e-02, -9.7763e-03,  1.2375e-02, -5.9523e-03,
         1.4020e-02, -4.8773e-02, -1.4634e-02, -3.1254e-02, -3.6333e-02,
        -8.2736e-02,  5.2912e-02, -3.1160e-02, -4.6035e-02, -1.2461e-02,
         1.0121e-01,  1.4876e-01, -5.6815e-02, -1.2496e-02, -1.0102e-01,
         6.7010e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1586e-02,  3.9780e+00, -4.1307e-01, -2.0897e-02,  1.1975e-02,
         2.8767e-02, -2.0153e-02, -5.8689e-02,  5.0106e-02,  3.6731e-03,
         5.1821e-02, -3.9071e-02, -7.2483e-02,  3.7920e-03,  2.4939e-02,
         5.1182e-02,  1.7430e-02, -1.2260e-02,  2.3677e-02, -3.4994e-02,
        -4.7078e-02, -7.9465e-02, -2.8325e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6317e-01,  1.5752e+01,  9.5233e-01,  9.5071e-02,  4.6265e-01,
         8.4324e-02, -8.7085e-02, -2.7505e-02, -4.9166e-02,  2.1234e-02,
         1.0509e-01, -5.8175e-02, -8.9200e-02, -2.0757e-01,  2.1737e-01,
         5.9452e-02,  7.0233e-02, -5.5111e-02,  2.2848e-02, -4.8950e-02,
        -3.4727e-01, -1.3697e-01,  4.2116e-02,  3.4111e-03, -9.7365e-02,
         1.0268e-01,  4.8542e-01,  3.0829e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-1.8910e+00,  1.0083e+02, -2.0358e-01, -1.0057e+00, -2.1025e+00,
         6.5193e-02,  2.2895e-01, -2.2849e-01, -8.1042e-01, -1.3091e+00,
        -1.7624e-02, -5.3376e-01, -3.8895e-01, -1.1217e-02, -5.9924e-01,
         2.1358e-02, -7.0988e-01,  5.0510e-01, -4.5773e-01,  1.7493e-01,
         8.8525e-01,  5.0421e-02, -7.2550e-01, -4.0494e-01, -8.0386e-02,
         8.0655e-02,  4.3016e-01, -1.1807e+00,  9.9032e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7216e-01,  8.7305e+01,  8.6935e-01, -1.3392e+00, -9.4537e-01,
         1.0095e+00,  3.2704e-01, -5.0747e-01,  2.1025e-01,  1.4205e+00,
        -4.1521e-01, -5.8695e-01, -1.2257e+00, -9.6956e-01, -4.9408e-01,
         3.4115e-02,  7.2192e-01,  2.7591e-01, -1.4346e+00, -8.8775e-01,
        -8.4887e-02,  7.8965e-01, -1.4250e+00,  4.1228e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0071e+00,  1.1553e+02,  3.9386e+00,  6.1519e+00,  3.9768e+00,
         2.5315e+00,  2.3989e-01, -6.3810e-02,  2.7725e+00,  1.7307e-01,
         1.3973e+00,  4.2303e-01,  9.6834e-01, -7.8642e-01, -2.1167e+00,
         9.2178e-01, -8.9945e-01,  2.9352e-01,  4.1185e-01,  2.8317e-01,
         7.7538e-03, -1.3638e+00, -1.0293e-02,  1.2272e-01, -1.1741e-01,
        -5.5324e-01, -1.7937e+00, -2.8723e-03, -3.3325e-02,  4.2984e-01,
        -1.1693e+00,  2.9655e+00, -1.3032e+00, -3.4586e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7945e+00, -1.0003e+02,  1.5073e+00,  2.7386e+00, -2.1265e-01,
        -6.4064e-01, -2.3371e+00, -3.1740e-01, -1.0628e+00, -7.5986e-01,
         3.4296e-01,  9.0350e-01,  3.0904e-01,  7.6133e-01,  3.0871e-01,
        -2.9624e-01, -2.6069e-01, -5.2523e-02,  7.9396e-02, -1.2524e+00,
         1.1811e+00,  1.8250e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4623e-01, -8.4773e+01,  9.0755e-01, -1.5521e+00, -8.8836e-01,
         5.7021e-01,  1.9938e-01,  3.6679e-01,  2.9270e-01, -1.5849e-01,
        -2.8738e+00,  9.4285e-02,  2.8456e-01, -8.7452e-02, -5.3501e-01,
        -1.4335e-01, -7.9466e-02, -4.8608e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2893e+00,  3.7681e+01,  1.3625e+00,  2.4732e-01,  5.1793e-01,
         3.0410e-02,  2.8968e-01, -4.8231e-01, -3.2063e-01,  3.1023e-01,
         1.2950e-02, -2.3607e-02, -5.0088e-01, -2.8353e-01, -5.5121e-02,
        -2.8797e-01,  1.2342e-01, -4.3915e-01, -5.2621e-01, -2.7835e-01,
        -4.9846e-01,  1.2265e+00, -2.0594e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8124e+00, -1.2484e+02,  1.3773e-01, -1.1588e+00,  5.3438e+00,
        -6.6772e-01,  4.5935e-01,  6.7027e-01,  2.1839e+00,  1.7310e+00,
         1.0925e+00, -5.5611e-02,  2.8854e-01, -1.1248e+00, -3.2937e-01,
        -2.1749e+00,  3.1922e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9144e+00, -1.0279e+02,  7.3025e-01,  4.4594e+00,  2.2639e-01,
         2.8384e-01, -3.1694e-02, -1.3902e+00, -2.2048e+00, -6.0245e-02,
         7.3997e-01,  4.2473e-01,  6.5963e-01,  2.9211e-01,  1.0099e+00,
        -7.9006e-01,  2.3432e+00,  2.7580e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7738e+00,  9.1661e+01,  2.1908e+00, -7.5598e-01,  8.4393e-01,
        -1.9984e-01, -1.1447e+00,  9.4687e-01, -6.2947e-01,  2.5268e+00,
        -1.8677e+00, -4.4725e-01, -1.5606e-01,  1.7533e-01, -4.8357e-01,
         2.2713e-01,  3.3920e-01,  1.0268e+00,  1.1535e+00,  2.4630e-01,
        -6.9299e-01,  3.8396e-01, -1.1801e+00,  2.8246e-02, -8.5592e-01,
        -1.7144e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2092e-01,  4.4167e+00,  3.2790e-03, -2.7038e-02,  2.6961e-02,
        -3.4324e-02, -5.6567e-02, -5.4067e-03,  7.3619e-02, -7.5969e-02,
         2.9599e-02,  1.7166e-03, -2.9417e-02, -4.0047e-02, -3.4550e-02,
        -3.5364e-03, -2.7919e-02,  7.6013e-03,  2.0031e-02, -8.0953e-03,
         3.3671e-02, -5.3303e-03,  1.5036e-02,  1.7899e-02, -1.0266e-02,
         2.8518e-02, -9.3342e-04, -2.7257e-03, -6.2160e-03,  2.2946e-02,
        -9.0049e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3417,  5.1588, -0.1546, -0.0896,  0.0055, -0.0077,  0.0543,  0.0631,
         0.0499, -0.2066, -0.0379, -0.1047, -0.0182, -0.0335, -0.0434, -0.0216,
        -0.0497, -0.1200,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.8647, -63.9548,   0.6831,   0.5541,   0.2035,  -2.6535,  -2.0882,
          0.4035,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.6428e-01,  4.1757e+00,  1.6605e-01, -4.1354e-02,  2.7178e-02,
         7.0346e-02,  5.4567e-02,  3.7598e-03,  1.8589e-02, -7.9379e-03,
         4.1808e-04, -6.2263e-02,  3.0814e-02,  3.9900e-02, -4.1657e-02,
        -2.1794e-02,  6.0028e-03,  7.7064e-03,  1.9222e-02,  3.5785e-02,
         2.0840e-02,  5.9157e-03, -5.2401e-03, -7.6759e-03, -1.3417e-02,
         1.9144e-02,  1.0262e-02, -4.0147e-03,  1.3770e-02, -1.1377e-02,
         1.0772e-02,  4.1992e-03, -1.2680e-03, -8.6576e-03,  6.0357e-02,
         7.3426e-03,  2.4587e-02,  2.8790e-02,  3.4272e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2022e-01,  1.6243e+01,  5.2304e-01, -2.4764e-02,  1.3556e-01,
        -6.6519e-02, -1.8753e-01, -3.7726e-02, -1.2808e-01, -4.7698e-02,
        -2.7399e-02,  2.5159e-03,  1.0503e-02, -6.2702e-02, -6.0641e-02,
        -7.9051e-04, -1.2435e-02, -1.1659e-01,  2.0893e-02,  2.2573e-01,
        -1.5999e-01, -2.3687e-01, -6.4649e-02, -1.7560e-02,  1.1913e-02,
         4.0160e-02, -2.2650e-01, -8.5503e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5200e+00, -5.9992e+01,  2.3452e+00, -1.7227e-01,  1.4299e-01,
         6.1900e-01,  5.1437e-01,  6.1699e-01, -3.4067e-02,  6.8670e-01,
        -1.8855e-01,  1.9134e+00, -1.3376e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.6412e+00,  8.6097e+01,  7.6127e-01, -2.8638e+00,  6.4502e-01,
         8.9770e-01,  1.1341e+00,  9.8432e-01, -1.8176e-02,  7.0906e-01,
         5.9564e-03, -2.3297e+00,  3.8923e-01,  7.5616e-01, -2.6079e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9234e-01,  1.8216e+01,  4.3722e-01, -3.0568e-01,  1.7745e-02,
         2.8415e-01,  6.7254e-02,  9.2263e-02,  2.5778e-02,  1.2408e-01,
         1.5699e-01,  1.4884e-01, -1.3903e-01,  7.5871e-02,  9.8740e-02,
         1.4171e-01, -2.8141e-02, -1.4875e-01, -2.6387e-02, -1.4656e-01,
        -6.5249e-02,  3.0885e-02,  3.9694e-02,  5.9788e-02,  1.1541e-01,
         1.6685e-02,  1.4817e-02,  1.0973e-01, -1.7052e-01,  2.1671e-02,
        -1.0007e-01,  5.4455e-02, -1.3033e-01,  2.5869e-03,  2.0143e-01,
         7.5013e-02,  8.0212e-03,  3.8162e-02, -4.2200e-02,  2.1155e-02,
         4.2433e-02, -1.3000e-01,  3.8402e-02,  2.1767e-03,  7.8045e-03,
        -1.1669e-01, -4.7434e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8670e-01, -9.7245e+00,  1.9896e-01,  6.1336e-02, -2.8781e-02,
         5.8000e-03,  3.8190e-02, -1.5311e-02, -1.0601e-01,  2.1496e-03,
        -2.1990e-02, -7.1130e-03, -3.9255e-02,  6.4395e-02, -1.0874e-02,
        -2.5459e-02, -4.6502e-03, -5.8755e-03, -2.4145e-02,  1.6368e-02,
        -1.8432e-02,  5.3325e-02, -2.0663e-02,  1.7341e-02, -3.3685e-02,
        -8.0180e-03, -8.2816e-02, -8.9196e-03,  3.2761e-02,  8.8220e-03,
         3.1729e-02,  2.1969e-02, -1.6127e-02,  6.7889e-04, -1.3806e-02,
        -5.7396e-03, -1.8838e-02, -2.9919e-02, -8.5599e-03,  4.3637e-03,
         1.8049e-02,  2.8869e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0238,  9.3425,  0.4396,  0.1503,  0.1977,  0.0177,  0.2508,  0.0852,
         0.0639, -0.1822, -0.0158,  0.1416,  0.0166,  0.0663,  0.1043, -0.0384,
         0.3769,  0.0759,  0.0491,  0.0379, -0.0831,  0.0325, -0.0143, -0.0779,
         0.0206,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0741e-02,  1.5747e+00,  5.9218e-02, -3.2825e-03,  1.2565e-02,
        -6.8260e-03,  7.9989e-02,  4.3389e-03,  1.3506e-02, -2.3519e-04,
         1.4917e-03,  1.4457e-03, -4.0201e-03,  3.7514e-03, -1.2502e-02,
        -1.4997e-02,  2.7594e-02,  2.1906e-02, -4.2229e-03,  6.6717e-03,
        -1.1909e-02, -9.9559e-03, -3.0796e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.1715, -13.7987,  -0.3952,  -0.0225,   0.0887,  -0.0627,  -0.1054,
         -0.0306,  -0.0514,  -0.2537,  -0.2117,  -0.0306,  -0.1250,   0.0517,
          0.1564,   0.0576,   0.1501,  -0.0188,  -0.1460,  -0.0714,  -0.0600,
          0.0587,   0.1829,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7871e-01,  2.8011e+00, -1.6308e-02, -6.9416e-02, -2.0319e-02,
         4.8545e-04,  9.5538e-02,  9.3112e-02,  1.1837e-02, -1.2664e-02,
        -2.2778e-04, -1.6952e-02, -4.3964e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  -2.2290, -118.8592,    2.8815,    1.2295,    0.9120,    1.1034,
           2.3777,   -0.7792,   -1.4423,    0.8257,   -1.6511,   -1.4961,
           1.8105,   -0.9188,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0123e-01, -2.9161e+01, -3.2642e-01,  4.4575e-01, -4.6251e-01,
         4.8739e-02,  1.2444e-01, -1.6372e-01, -2.1805e-02,  1.4925e-01,
         3.5175e-01,  1.4606e-01,  4.8779e-02,  1.5148e-01, -5.6853e-02,
         2.6675e-02,  2.5313e-01,  3.3763e-01, -1.2926e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 5.9981e-01,  3.6973e+01,  3.6847e-02, -9.0997e-01,  3.7519e-01,
         8.8685e-01,  7.2817e-02,  6.4782e-01,  1.6003e-01,  7.4754e-01,
         5.8300e-02, -1.2174e-01, -3.6905e-02,  1.1191e-01,  4.2647e-02,
        -4.9199e-01,  2.4931e-01,  1.2194e-01,  9.7465e-02,  1.2987e-01,
        -1.3429e-01, -5.6365e-02, -1.4101e-01,  2.5787e-01, -8.6557e-02,
        -2.3610e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2410e+00,  2.6922e+01, -9.7619e-01,  2.5989e-02,  9.0151e-01,
        -2.8826e-01, -1.2165e-01, -2.4416e-01, -2.0242e-01, -2.5636e+00,
        -7.7198e-02,  2.3703e-02,  5.3569e-01,  4.5849e-01,  1.9902e-02,
        -1.0801e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.6547e-01,  4.1802e+01,  9.9621e-01,  2.5869e+00,  7.4183e-02,
         3.4198e-01,  1.3578e-01,  1.6002e-01, -1.5321e-01,  1.7193e-02,
        -3.7104e-01,  1.6766e-01,  2.5231e-01,  4.0208e-01, -1.6850e-01,
         3.5075e-01,  1.2203e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9661,  8.4207,  0.0241,  0.0815, -0.3033,  0.1717, -0.0700,  0.0471,
         0.1381,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4156e+00, -3.0447e+01,  1.2386e-01,  7.2976e-01,  1.3665e+00,
        -3.2296e-01, -3.7124e-01, -2.5786e-01, -4.9211e-01, -9.2523e-02,
        -1.0184e+00, -3.3179e-01, -6.2415e-02, -1.3324e-01, -3.7201e-02,
         1.9348e-01, -1.9363e-01,  1.7678e-01,  2.1286e-01,  1.8285e-01,
         4.4204e-02, -1.4398e-01,  3.4527e-02, -4.1664e-02,  1.2024e-01,
         2.7738e-02,  4.8814e-01, -7.5492e-02, -1.4275e-01, -4.0778e-02,
        -2.6100e-02, -1.0935e-01, -2.9465e-01, -1.0481e-01,  4.0618e-02,
        -5.4885e-02,  9.0390e-02, -3.9033e-01,  1.3638e-01,  4.8295e-02,
         2.4876e-02, -6.4945e-02,  6.4916e-02, -9.7159e-02, -2.4416e-01,
        -7.4328e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2813e-01,  3.9518e+01, -5.6211e-01, -1.0736e+00,  2.8004e-01,
        -4.7096e-01,  6.7627e-02, -2.3379e-01, -1.9811e-01, -5.3038e-02,
        -2.6033e-01, -5.5658e-02, -1.6612e-01,  6.5283e-02,  1.6248e-01,
         1.5406e-01,  1.0812e-01,  9.6040e-02, -2.1389e-01,  2.4839e-01,
        -3.5977e-01, -5.7723e-02, -4.0849e-01,  5.1710e-03, -8.4405e-02,
        -7.5019e-02,  3.1682e-01, -1.4807e-01, -1.2256e-01,  6.9876e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.6572, -85.3600,  -1.3663,  -4.3500,   1.6116,   0.2585,   2.2189,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0502e-01, -1.9975e+01, -1.1101e-01,  3.4887e-01,  4.7771e-02,
        -8.1386e-02, -7.2147e-02, -2.9786e-01, -4.9785e-04,  5.9109e-02,
        -4.8340e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9351, 43.8054,  1.8325,  0.6641, -0.2044, -0.6009,  0.5194, -0.1671,
         0.5791,  0.2889,  0.0683,  0.2719,  0.1157, -0.4906,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5138e+00,  8.2950e+01, -1.7028e+00, -1.9306e+00, -2.8618e-02,
        -3.0678e-01, -1.4315e+00, -7.6826e-01, -2.6840e-01, -5.5584e-01,
         2.0963e-01,  2.8137e-01, -9.4724e-01, -1.1390e+00,  1.2558e-01,
         2.1587e-01, -1.3776e-02, -1.2983e-01, -2.1864e-01,  1.5833e+00,
         4.8547e-01,  8.3491e-01, -2.1209e-01,  3.1893e-01, -1.4362e-01,
         4.6107e-01,  4.7147e-01,  1.7747e-01, -9.2258e-02, -9.2570e-02,
        -1.0974e+00,  1.0035e+00,  4.4988e-01,  2.2981e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6951e+00,  6.5434e+01,  4.1502e+00,  5.0650e-01,  1.1251e+00,
        -3.3748e-01, -2.0333e-01, -6.5650e-01, -6.9259e-01,  3.3451e-01,
         6.4600e-01, -6.0509e-01, -2.1638e-01, -5.2607e-03, -1.0302e+00,
        -4.4760e-01, -1.0018e-01, -1.0028e+00,  3.2924e-02,  8.7743e-02,
        -2.4909e-01,  2.5148e-01, -4.2021e-01, -1.9438e-02,  3.4163e-01,
        -1.4412e-01, -7.6031e-01, -2.7187e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.2331e-01,  3.2588e+01,  6.6225e-01, -3.9030e-01,  1.5062e-02,
        -1.9390e-01,  2.2202e-01,  1.2071e-01, -5.3831e-02,  6.1479e-02,
         2.0584e-01, -2.6394e-01,  6.2036e-02, -5.5429e-01,  3.5846e-01,
        -1.3337e-01, -5.3558e-01, -2.0860e-01,  4.7037e-01,  2.5013e-01,
        -1.9849e-01,  2.5953e-01, -5.7136e-01, -4.5490e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 7.6586e+00,  6.4925e+01, -1.6378e+00, -5.5607e-01,  8.6876e-02,
        -4.7551e-01,  2.5158e-01, -5.7717e-01,  1.1533e+00,  3.7751e-01,
        -1.4094e+00, -1.2711e+00,  5.1819e-01,  5.8007e-01,  5.4740e-02,
        -8.0652e-01, -1.3351e-01, -5.9187e-01, -9.3461e-02, -1.9017e+00,
        -3.5177e-02,  2.0056e-01, -8.8542e-01,  9.6199e-01, -2.2357e-02,
         6.9886e-01, -9.5656e-01, -5.8607e-02,  1.5816e-01, -3.6204e+00,
         2.1339e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9818e+00,  5.8228e+01, -1.5152e+00, -4.3570e-01,  1.3402e+00,
        -7.9255e-01, -2.2588e-01,  1.0983e-01,  1.3295e+00, -1.2550e-01,
        -1.2571e-01,  3.5015e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5694e+00,  8.4554e+01,  2.0064e+00,  7.8251e+00,  7.2344e-01,
        -1.4890e-02, -3.9033e-01, -1.4246e-01,  4.1168e-01, -7.4729e-01,
        -1.1540e+00, -2.2154e+00,  6.7140e-01, -6.9357e-01,  3.6792e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6824e+00,  1.0446e+02, -1.0794e+00,  1.0928e+00,  2.1159e+00,
         2.9498e-01,  3.1166e+00,  1.5042e+00, -4.2953e-01,  9.2221e-01,
         6.8260e-02,  9.1719e-01,  2.5877e-01,  9.8337e-02, -1.5658e-01,
        -2.7109e-01, -6.5822e-01,  2.1091e-01,  1.3266e+00, -7.7159e-01,
        -1.0725e-02, -2.7481e-01,  1.5236e-01, -2.4611e-04, -7.1262e-02,
         1.0478e-01,  6.9533e-01,  2.1226e-01, -1.0737e-01,  3.2585e-01,
        -1.4072e-01, -3.7546e-01,  2.9748e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5967, 51.4573, -0.7736, -0.2876, -0.2493, -0.6573, -0.4512, -0.8040,
        -0.1698, -0.0518,  0.0887,  0.6296, -0.9085, -1.9362, -0.8962,  0.4166,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2654, 42.2188,  1.1385,  0.7344, -0.3929,  0.3808, -0.7852, -1.9730,
         3.3014,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6270, 12.1744, -1.3849, -0.0382,  0.0787,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0720e+00,  9.4111e+01, -2.4404e+00,  4.4011e-01,  3.2086e-01,
        -6.1476e-01, -2.7628e-01, -1.0176e+00,  1.5051e+00,  1.2399e+00,
         1.8918e-01, -6.1407e-01,  2.8686e-01, -2.0661e+00, -4.6494e-01,
         2.5586e-02,  7.6138e-01,  3.5804e-01, -1.6436e+00,  1.3674e-01,
        -2.1327e-01,  4.0905e-01, -2.5415e-01,  9.4330e-01,  1.7904e-01,
         4.8299e-01, -1.3831e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6999, 55.2391, -0.1565,  0.1135,  2.3306,  3.1251,  1.7150,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9845e-01,  1.1397e+02, -3.8755e-01, -2.8526e+00,  8.6488e-01,
         1.0613e+00,  4.9509e+00, -1.3226e+00, -1.7228e+00,  9.9171e-02,
         4.4787e-02,  1.7153e+00, -2.6142e-01,  7.8267e-01,  2.1770e+00,
         5.6957e-01, -3.9223e-01,  6.1811e-01, -2.0394e+00,  2.1432e+00,
         3.0189e+00,  2.7795e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5286, 42.8109,  0.0969,  0.1763,  0.3034,  0.1704, -0.2066,  1.0578,
        -0.2736, -0.2300,  0.4007,  0.2890, -0.3130,  0.6951, -0.1174,  0.3076,
        -0.0989, -0.1923, -0.9255,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.5312e-01, -8.9495e+01, -1.4277e+00,  9.3058e-01,  1.2192e+00,
         1.4801e+00, -1.8839e-01, -1.1694e+00,  5.9741e-02,  3.1787e-01,
        -1.6513e-01,  9.3211e-02,  1.5193e-01,  6.5017e-02,  5.2551e-01,
        -6.6196e-02, -6.9496e-02, -1.8526e-01,  4.1992e-01, -4.4593e-02,
        -5.2640e-01, -5.4790e-01,  7.1713e-02,  1.8327e-01, -7.7911e-01,
         3.5129e-01,  5.1611e-01,  7.6776e-02, -1.1920e+00,  9.5429e-01,
         2.3064e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-3.0300e+00,  1.0515e+02, -5.7620e+00, -1.6074e+00, -1.9956e+00,
        -1.6163e+00,  1.0964e-01, -1.6356e-01,  2.7317e-01,  2.7529e-01,
         6.6445e-01,  3.6094e-01, -1.0376e+00,  1.0301e+00,  9.2126e-01,
         1.9660e-01,  2.4552e+00, -3.9055e-01, -6.5183e-02,  3.0092e-01,
        -2.5513e-01, -6.4404e-01,  2.8584e-01, -9.3221e-01, -9.0411e-01,
        -4.5865e-01,  1.4676e-01, -1.3037e+00, -7.8779e-02,  2.0675e-01,
         2.6434e-01, -1.7873e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9733e-01,  7.5669e+01, -1.1434e+00,  3.4645e-01, -9.1148e-01,
        -3.1395e-01, -1.5702e+00,  1.0756e+00,  4.9256e+00, -7.4447e-02,
        -3.9819e+00,  5.3428e-01, -4.4102e-01,  8.5518e-01,  2.2196e-01,
        -8.3053e-01, -3.3728e-01, -1.5031e-01,  2.5459e-01,  1.2773e-01,
         6.6468e-01,  5.8857e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3010e-02,  1.0542e+01,  2.8856e-01,  4.6211e-02, -2.8446e-02,
         4.1926e-02, -2.1628e-01,  5.0126e-02,  4.4893e-03,  8.2999e-03,
         1.0213e-01,  4.5674e-02,  1.3867e-01,  5.5059e-02,  9.1000e-03,
         1.1870e-02, -3.7219e-02,  6.2548e-02, -7.1928e-02, -1.6577e-03,
        -3.2129e-02, -5.0486e-02, -3.8322e-02, -5.3616e-02, -6.4727e-02,
        -7.8937e-02, -1.6038e-02,  2.6472e-02,  1.2539e-01,  1.3324e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6377e+00,  2.5702e+01, -1.7463e+00, -9.0389e-01, -7.5218e-02,
        -2.7543e-01,  9.9367e-02,  3.1346e-02, -4.4000e-01,  1.6734e-01,
        -3.8798e-01,  2.0347e-01, -3.9664e-01,  5.5851e-03,  5.0164e-02,
        -6.9152e-02, -8.3613e-01, -6.1292e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.2260e-01,  9.3512e+01,  9.6856e-01,  2.4227e+00,  7.8021e-01,
        -2.3178e-01,  6.8200e-02,  1.1833e+00,  1.6340e+00,  4.2217e-01,
        -3.5260e-02, -1.3425e-01,  1.4640e-02,  3.4456e-01, -4.5639e-01,
        -8.6500e-02,  2.1416e-01,  2.4013e+00, -1.1366e+00,  1.2563e-01,
        -1.5311e-01,  2.1304e-01,  4.6839e-01,  1.6758e+00,  7.3333e-01,
        -5.6845e-02,  4.2120e-01, -1.0603e-01, -2.5046e-02,  1.4628e-01,
        -1.9917e-01,  3.0040e-01,  3.0350e-01,  7.9449e-01, -2.2713e-02,
        -7.4855e-01, -2.4057e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0210e+00,  1.0659e+01, -4.0643e-01,  1.4671e-01,  1.6682e-01,
        -6.3668e-02, -4.0917e-02, -2.9870e-02,  3.5744e-03, -4.5677e-02,
        -4.6960e-02,  1.4475e-01,  1.6525e-02, -1.6130e-01, -1.4654e-01,
        -5.9732e-02,  6.6496e-02, -8.0152e-02, -3.1065e-02,  1.1166e-01,
         1.4772e-01, -4.1482e-02, -5.3441e-01, -1.2558e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6285e-01,  1.6226e+01,  5.5338e-02, -3.1103e-01,  2.7039e-01,
         7.1849e-02, -1.2292e-01,  1.3688e-01,  4.0527e-02, -1.2299e-01,
        -8.1016e-02, -9.5337e-02, -6.6178e-02,  1.5282e-02, -1.5068e-01,
         3.0829e-02, -7.2338e-02, -7.3652e-02, -4.0869e-03, -1.1215e-01,
        -1.4060e-02, -4.7561e-02, -2.7086e-02,  6.5396e-02,  1.2191e-02,
         3.1696e-01, -4.5344e-02,  1.1518e-02,  2.0057e-02, -2.5196e-02,
         1.6610e-01, -1.0983e-01, -1.7911e-02,  5.9450e-03,  7.2568e-02,
        -5.8023e-02, -1.4311e-02, -5.8479e-02, -6.9183e-03,  7.4802e-03,
        -2.0359e-02,  8.7505e-03, -4.3013e-02,  1.4201e-01, -5.8766e-02,
        -8.5566e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0252e-02,  1.8910e+00, -3.2638e-02, -3.6359e-03, -9.8249e-03,
         2.0545e-02, -4.7401e-02, -2.8636e-03,  1.4353e-02, -2.9976e-02,
         3.4593e-02,  1.8422e-02,  3.0948e-02, -8.8039e-03,  8.7209e-03,
         4.7070e-03,  1.4084e-03,  3.8369e-02, -1.6444e-02,  8.3911e-03,
         5.6025e-02,  1.8243e-02,  1.3393e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3177e+00,  5.1717e+01, -5.0800e-02, -2.2193e-02,  1.9558e-01,
         6.0184e-01,  2.7662e-01, -1.3149e-01, -3.1539e-01,  1.5435e-01,
         2.5316e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0393e+00,  6.7283e+01,  3.3090e-02,  2.2971e-01, -8.7524e-01,
        -7.3528e-01,  2.9633e-01, -7.4380e-01,  1.7575e+00,  9.7719e-01,
         3.7289e-01, -1.5744e-01,  1.7423e+00, -1.9395e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2190, 18.9163, -0.2266, -0.7255, -0.3256,  0.0314, -0.3126, -0.1880,
         0.0499,  0.1544,  0.4599, -0.1963,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0740e+00, -1.0408e+02, -4.2973e+00, -4.2836e+00, -6.6797e-01,
         1.7192e+00, -1.2202e+00, -1.9226e+00, -3.3684e-01, -1.5176e-01,
         3.4089e-01,  3.5073e-01,  1.5913e-01, -3.0733e-01,  3.9223e-01,
        -3.8506e-02,  5.3026e-02,  2.3023e-01, -3.1144e-01,  3.0070e-01,
         1.5740e-01, -8.0356e-02, -2.7976e-01,  7.1231e-02, -1.8499e-01,
        -2.1739e-01, -1.5994e+00,  1.5421e-01,  4.7697e-01,  5.2862e-01,
         1.4740e-01, -1.0569e-01, -1.5331e-01, -5.3693e-01, -1.6518e-01,
         1.6179e-01,  3.4924e-01,  1.0942e-01,  2.6963e-01, -6.1467e-04,
        -1.5030e-01,  4.7084e-02, -1.2775e-01, -3.9994e-02,  3.7267e-01,
         6.2741e-02, -3.5389e-01,  4.2165e-01,  4.2194e-01, -1.3343e-02,
        -1.5977e-01,  2.4790e-01,  6.1128e-01,  9.5347e-02, -1.0521e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-3.5542e+00,  7.2995e+01,  2.1168e+00, -2.0219e+00,  1.2339e+00,
         5.8509e-01,  3.0479e-02, -2.4248e-01, -1.4181e+00, -8.2392e-02,
         2.6600e-01, -1.7605e-01, -8.4278e-02, -7.2029e-01, -4.6064e-02,
         1.7973e-02, -3.2918e-01,  2.1773e+00,  6.1520e-01,  1.0455e+00,
        -2.8487e-01, -2.3622e-01,  2.6795e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.2345, -27.1436,   0.8088,  -0.4341,  -0.0879,   0.3292,   0.0763,
         -0.0675,   0.5025,   0.3493,   0.0584,  -0.0955,   0.2938,  -0.1303,
         -0.1189,   0.4238,   1.4202,   0.1506,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1508e+00,  3.5645e+01, -4.3227e-01, -5.4738e-01,  9.7809e-01,
        -1.2247e-01,  2.7006e-01,  1.0201e+00, -3.1066e-01,  1.8893e-01,
         2.2454e-01, -2.8041e-01, -2.1033e-01,  2.3182e-01, -2.3415e-02,
         5.8295e-01, -1.2152e-01,  6.8110e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.3456e-02, -6.0994e+01,  2.7894e+00,  5.9943e-01, -7.1880e-01,
         3.6982e-01,  1.2507e-02,  4.9582e-01,  5.2555e-01,  4.4293e-02,
        -2.4431e-01, -3.6383e-02, -6.1173e-01, -1.4468e+00,  1.6251e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6247e+00,  3.9734e+01, -6.0820e-01,  2.5952e-01,  6.7142e-02,
        -1.0084e-01,  1.0265e-01, -1.8350e-02,  1.5149e-01, -1.9092e-02,
         1.4636e-01,  1.8456e-02,  2.6826e-02,  1.6856e-02, -8.7958e-02,
         5.3644e-01, -3.8009e-01, -2.2894e-01,  7.8195e-04, -5.2812e-02,
         1.5451e-01, -1.7248e-01,  1.4406e-01, -8.6602e-02,  4.7798e-01,
         6.4251e-02,  6.1654e-01,  3.7614e-02, -6.4464e-02,  2.6668e-02,
         2.5579e-01, -2.0712e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8402e+00,  6.0342e+01,  1.3778e+00,  6.1749e-02,  3.5748e-01,
         9.2067e-02, -1.5952e-01,  5.3918e-01, -1.2134e+00, -4.4970e-01,
         1.7740e-03, -2.9185e-01, -4.5507e-01, -9.2466e-01, -7.5640e-02,
         1.5918e-01, -4.5552e-01, -5.0540e-01,  2.5044e-01, -4.6128e-02,
        -3.5439e-01,  2.2018e-01, -5.4315e-01,  4.3054e-01, -1.4200e-01,
         3.8224e-01, -1.4741e-01, -1.8241e-01,  6.9280e-02, -1.5759e-01,
         3.4153e-04,  5.6214e-01,  1.9831e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4207e-01,  4.5656e+01,  1.3480e+00,  9.0199e-01,  1.0459e+00,
         3.9341e-01,  1.4925e-01, -3.5721e-01,  5.4411e-01,  6.3983e-03,
        -1.3839e-01, -4.7607e-01,  1.7643e-01,  1.5235e-02,  1.9941e-01,
         1.0987e-01,  4.7816e-02,  1.5838e-02, -1.9256e-01,  2.8186e-02,
        -3.4834e-02, -1.7295e-01, -7.4583e-02, -7.2191e-02,  4.8369e-02,
         2.0124e-02, -4.3955e-02, -7.8756e-03, -2.6221e-02,  1.3045e-01,
         1.3380e-02,  1.0515e-02, -1.7696e-02,  1.3039e-01, -1.3212e-01,
        -1.3525e-01, -2.0088e-01, -1.5231e-01, -5.6927e-02,  3.2781e-02,
         1.4824e-02,  9.5962e-02, -6.0020e-02, -1.1260e-01,  1.2685e-01,
         4.0621e-02, -3.8751e-02, -1.1602e-01, -6.3978e-02, -1.8332e-01,
         1.6062e-01, -1.9841e-03, -3.8986e-02, -1.0904e-01,  5.6338e-02,
        -1.4201e-01,  7.0266e-02, -2.8543e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3987e-01,  1.6323e+01,  2.3801e-01,  2.2731e-01, -2.1393e-01,
         1.0809e-01, -5.7803e-02, -2.0080e-01, -8.9610e-02,  2.6012e-01,
        -2.1353e-01,  1.1978e-02,  3.7554e-01, -4.5130e-02, -3.0267e-02,
        -3.4977e-02,  1.0314e-01,  1.2514e-03, -8.1069e-02, -9.8489e-02,
         2.6228e-02,  2.4705e-02, -6.4909e-02, -3.7823e-03, -2.7248e-01,
        -2.4104e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -11.4218, -131.3949,    2.4568,    3.3902,    1.0490,    1.5874,
          -0.4914,    0.5141,    0.5346,    0.8058,    0.2028,   -0.3988,
           0.7590,   -0.4680,    0.8274,   -0.1878,   -0.2361,   -0.1534,
          -0.3835,   -0.3514,    0.5739,    1.8372,   -0.6412,   -0.2724,
           5.3004,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.2072e-01,  1.9160e+01,  3.6883e-01, -2.8004e-02, -5.6110e-01,
        -2.0262e-01, -2.0083e-02, -3.6480e-02,  1.1151e-01,  1.4876e-01,
         9.2671e-02, -1.7208e-01, -1.0187e-01,  2.1421e-01, -3.5619e-02,
         2.9573e-02, -1.0083e-01,  1.9358e-01, -2.2383e-02, -3.4692e-02,
         3.6464e-02,  1.3130e-01, -9.0874e-02, -1.3921e-01,  2.0949e-02,
        -3.0016e-01, -1.4496e-01, -5.3882e-03, -2.6596e-02, -6.8757e-02,
         1.2869e-02, -1.3640e-01, -8.6432e-02,  2.5398e-02,  1.7507e-02,
        -7.1413e-02, -6.8739e-02, -1.4458e-01, -5.0312e-02,  1.3493e-02,
        -9.2962e-02, -6.6411e-02,  1.0060e-01, -6.8595e-03,  2.0573e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3788e-01,  3.7537e+00,  6.7452e-02,  2.5684e-02,  4.1808e-02,
        -2.2400e-02,  2.8626e-02,  2.5149e-02, -1.0197e-01, -1.1592e-02,
        -6.0422e-03, -1.7337e-02,  1.8108e-02,  1.6381e-02,  8.4370e-03,
        -8.9128e-02,  3.6271e-03,  2.8362e-02,  3.2697e-02,  3.6300e-02,
         8.5376e-03,  5.5826e-02,  3.4721e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0703,  1.3094, -0.0045,  0.0214, -0.0052,  0.0062, -0.0576,  0.0084,
         0.0178,  0.0025, -0.0016,  0.0464,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 2.5718e+00, -8.7422e+01,  1.4726e+00,  1.4878e+00,  7.9140e-01,
        -6.6070e-01,  5.6273e-01,  1.2099e+00, -3.2321e-01,  1.5645e+00,
         2.7025e+00,  1.1400e-01,  2.2514e-01, -3.3657e-01,  6.9031e-01,
         1.1410e+00, -2.3255e-01, -1.2628e+00, -2.3631e-03, -4.8477e-01,
         4.6755e-01,  1.5226e+00,  1.5020e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0843e+00,  5.4965e+01,  6.3267e-01, -8.4100e-01,  1.0221e-02,
        -6.6625e-01, -3.1438e-01,  8.4853e-01, -1.1701e-01, -6.8203e-02,
         3.0119e-02, -2.7790e-01, -1.6143e-01,  6.8693e-02, -1.4985e-01,
        -2.0091e-01, -4.4656e-01, -2.1036e+00, -1.4292e-01, -2.7002e-02,
         3.2796e-01, -3.0195e-01, -8.6104e-02, -3.4721e-01, -2.4308e-01,
        -1.3179e-01, -4.7754e-01, -2.8661e-02, -6.0690e-01, -4.8654e-02,
        -1.0806e-01,  3.5998e-01,  7.8622e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1087, 26.2250,  0.1485, -0.4650, -0.3417,  0.0424, -0.2727,  0.0403,
         0.0277,  0.1033, -0.0263, -0.1999,  0.0884, -0.1503,  0.1257, -0.2248,
        -0.2599,  0.2147, -0.2274,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1017e+00,  6.1584e+01, -3.2428e+00, -9.8892e-01, -3.0351e-01,
         2.3658e+00,  3.1723e-01, -8.5962e-01,  4.5065e-02, -4.1372e-01,
        -3.1018e-01, -4.8639e-01,  9.0315e-01,  9.4420e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6061e+00, -1.2634e+02, -1.1089e+00,  1.4824e-01, -8.1860e-02,
        -1.3242e+00, -1.0719e+00, -7.2560e-01, -1.1704e+00, -1.1959e-01,
        -2.8427e-01, -3.5016e-01, -2.8704e-01, -2.4433e+00, -6.9099e-01,
         5.2425e-01, -1.2304e+00, -3.1755e-01, -6.7962e-01,  7.6467e-02,
         7.1419e-01, -9.0934e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1814e-01, -3.0507e+01, -2.7434e-01,  1.4162e-01, -1.1877e-01,
        -4.5175e-01,  2.3179e-01,  9.5929e-01,  1.1177e-02,  8.1598e-02,
        -1.1472e-01,  8.2406e-02,  4.1682e-01,  1.6918e-01, -2.6802e-01,
        -1.4475e-01, -2.1241e-01, -2.1530e-01,  1.1170e-01, -4.7305e-01,
        -1.1713e-01, -4.6799e-02, -1.5018e-01,  2.4074e-01,  8.1130e-02,
        -1.2515e-02, -4.7108e-02, -4.2515e-02, -4.7295e-01, -1.1736e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8648e-01,  1.4328e+00, -3.3741e-02, -5.9447e-02, -2.3235e-03,
         4.3074e-02,  2.5754e-02,  1.0186e-03,  9.2542e-03,  9.1067e-03,
        -1.6303e-02,  4.9219e-03, -1.6542e-02,  9.6878e-03,  2.7772e-02,
        -5.3866e-02, -2.8049e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0467,  4.4376,  0.0430, -0.0394, -0.0539, -0.0211,  0.0397,  0.0548,
         0.2165,  0.0227, -0.0496,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4591e-02, -4.5991e+00, -5.7209e-03,  3.8130e-03,  1.3241e-01,
         4.5049e-02, -2.3992e-02,  1.5722e-02,  2.2393e-02, -2.0929e-02,
         7.0543e-02, -3.2518e-05, -7.8273e-03, -5.4679e-02, -1.0273e-01,
        -2.5182e-01,  1.7047e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6578e+00, -7.4671e+01, -1.7275e+00, -2.2498e+00, -1.6306e-02,
         8.2597e-01, -1.1759e-01,  2.0660e-01,  6.2195e-01,  5.9827e-01,
         1.3770e-01,  2.1402e-01,  8.4171e-04, -8.3416e-01, -1.3261e-01,
        -2.4139e-01,  4.7473e-02,  4.8286e-01,  5.5427e-01,  1.4990e-01,
        -1.0589e-01,  2.1782e-02, -6.2428e-01, -3.6791e-02, -7.2170e-01,
         1.6258e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2755e-01,  5.9998e+00,  1.2935e-01, -7.8486e-02, -3.0755e-02,
         1.5333e-01, -8.5259e-03,  5.4740e-02,  8.9559e-02, -1.8012e-02,
        -1.5852e-02, -1.2240e-01, -1.7385e-02,  4.8656e-02, -4.3473e-02,
        -2.0198e-02, -8.1270e-04, -2.1492e-02, -5.1612e-03,  1.7073e-02,
        -2.3435e-02,  5.9119e-03, -3.5783e-02, -5.7787e-02,  6.8967e-02,
         1.4788e-02, -2.1333e-02, -3.9838e-03, -2.9716e-02,  2.0255e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.8411e+00, -9.0063e+01,  4.3429e-01, -2.9756e-01, -5.3898e+00,
         2.7494e-01,  1.3751e+00,  7.4541e-01, -3.3570e-01,  9.2439e-01,
         1.5442e-02,  1.1618e-01, -2.2432e-01,  9.4455e-01,  5.4695e-01,
         2.5218e-01,  1.2521e+00, -3.9355e-01, -8.0919e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-2.4814e+00, -9.9154e+01,  5.3462e-01, -6.3992e+00, -3.3590e-01,
         6.8331e-01, -1.7961e+00,  3.3529e-01, -4.2265e-01,  1.9205e-01,
        -7.1241e-01,  3.2343e-01, -3.1375e-01, -7.8370e-02,  1.3750e+00,
        -6.5129e-01, -4.2555e-01, -4.5631e-01, -4.3955e-01, -1.9070e-01,
         8.4448e-01,  1.9871e-01, -1.4937e-01,  7.2910e-01,  1.7253e+00,
        -8.5425e-02,  1.1561e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.9079e-02, -3.8734e+01, -8.5487e-01, -3.2762e-02, -8.4215e-03,
        -6.3649e-02,  1.0552e-01, -3.8278e-01, -1.0573e+00, -3.4118e-01,
         2.2318e-01, -2.5020e+00,  1.8852e+00,  1.1683e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8089e-01, -9.5962e+01,  1.5022e+00, -1.4742e+00, -6.7163e-01,
        -5.1042e-01, -8.1611e-01,  8.8518e-01, -2.4634e+00,  2.0682e-01,
         8.9512e-02, -8.6708e-02, -6.3611e-01, -2.1382e-01, -5.9493e-01,
         9.4499e-02, -5.1288e-02,  1.6861e+00, -1.2265e-01, -3.5803e-01,
         1.2751e-02,  4.8194e-01, -3.0429e-01, -6.5780e-01,  1.5397e-02,
         1.9921e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7513, 19.4804,  0.3616,  0.0987,  0.1276, -0.4037,  0.2123,  0.3973,
        -0.0726, -0.2911, -0.0249, -0.2633,  0.8720, -0.2535,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3702e-01,  1.6780e+01,  2.4527e-01, -2.0047e-01, -2.9429e-01,
        -1.1760e-01, -6.0209e-02, -1.5245e-01, -4.2491e-02, -2.4482e-02,
        -6.8328e-02, -2.2416e-01, -6.0112e-02,  1.3325e-01,  1.7956e-01,
         2.5031e-02,  1.2164e-01,  8.2616e-02, -6.4692e-02,  2.7566e-02,
        -3.2629e-02,  6.8078e-03,  3.4006e-02,  5.8266e-02,  6.1247e-03,
         2.6840e-02, -5.1600e-02, -2.7419e-02, -1.0097e-02,  3.1748e-02,
        -3.0547e-01,  4.2018e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0466e-02,  4.6481e+00,  1.1397e-01, -1.2181e-01, -3.5209e-02,
         7.4410e-02, -1.1277e-01, -2.9654e-02,  3.2508e-02, -1.9055e-02,
         2.7246e-03, -1.0362e-04,  5.4627e-03,  1.6502e-02,  4.3971e-02,
        -8.6021e-03,  5.2398e-03,  5.4427e-02, -1.2839e-02,  4.4107e-02,
        -1.3024e-02,  1.3609e-02,  2.0746e-02,  6.8709e-03, -2.9418e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0178,  6.2791,  0.2284, -0.1580, -0.0716, -0.1723,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4559e-02,  2.2740e+00,  1.1739e-01, -3.0766e-03,  5.6032e-02,
        -8.8241e-03,  1.9914e-02,  1.2355e-02,  1.9421e-03, -3.0602e-02,
        -1.2857e-03,  7.5283e-02, -1.8951e-03, -1.5906e-02,  5.3295e-03,
        -1.1765e-02, -1.3550e-02,  1.6632e-02, -5.4612e-03,  3.6608e-04,
        -1.8459e-02, -2.4690e-02, -5.0437e-03, -8.8811e-03, -4.3072e-03,
        -8.7200e-03,  5.0331e-04, -1.0236e-02, -5.7785e-03,  4.4412e-02,
         6.5623e-03,  1.6120e-02,  9.7264e-03,  2.7592e-03, -1.5029e-02,
         1.0503e-02,  5.4026e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9675e-01, -9.4616e+00,  3.9156e-02,  2.5675e-01,  2.3398e-02,
        -6.8167e-02, -9.0687e-02, -1.4723e-02, -4.0745e-02,  8.0958e-02,
        -8.4481e-02, -5.7811e-02,  8.9326e-02, -4.0248e-02,  4.3254e-02,
         1.0078e-01,  5.4573e-02, -3.2397e-02, -4.0015e-02, -1.1375e-02,
         1.0237e-01,  3.4313e-02,  2.8809e-03,  2.6354e-03,  2.0949e-02,
        -4.7711e-03, -1.4508e-03,  3.2381e-02,  4.6828e-02,  1.6160e-02,
         2.1055e-02,  1.5519e-02,  6.3942e-04, -8.2085e-02,  1.0142e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9406e-02,  2.0138e+00, -3.7939e-01,  4.4459e-02, -1.5434e-03,
         7.7078e-03,  7.8256e-03, -1.3343e-02,  2.0590e-02,  8.3195e-03,
        -5.4035e-02, -4.3516e-02, -4.1509e-02,  4.0438e-02, -2.8213e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8702e+00, -3.8530e+01, -2.0173e-01, -1.6473e-01,  3.6234e-01,
        -6.8074e-01,  6.1854e-02, -1.1026e-02, -1.4156e-01,  9.7775e-02,
         8.3812e-02,  1.6529e-01,  4.0846e-02,  6.3141e-03, -1.8444e-01,
         3.8651e-02, -1.3479e-01, -8.3095e-02, -8.9572e-01,  5.8699e-02,
        -1.7408e-02, -4.1798e-01,  2.8241e-01,  4.9606e-02,  3.0351e-01,
        -2.4187e-01, -1.4560e-03, -5.9935e-02,  8.6841e-02, -1.6242e-01,
        -1.1375e-01, -8.6763e-01,  2.8949e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1856e+00, -5.1203e+01,  6.7580e-01,  8.0740e-01, -8.4697e-01,
         7.2645e-01,  1.9012e-01, -1.5464e-01,  4.2708e-01,  1.7766e-01,
         5.3302e-01, -8.9971e-01, -4.3875e-01, -1.9145e-01,  6.1246e-01,
         2.2707e-01,  2.3216e-01,  4.8262e-02,  5.9856e-01,  1.3059e-01,
         2.6702e-01,  2.2677e-01, -5.2922e-01, -1.0505e-01, -2.0661e-01,
        -2.6856e-01,  7.3088e-01,  1.1005e+00,  1.6217e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 2.0512e+00,  1.1413e+02,  6.6162e-01, -2.2442e+00,  4.7227e-03,
         1.0940e+00,  1.3495e+00,  1.2379e+00,  1.4065e+00, -2.0102e+00,
         1.7760e-01,  5.8542e-01,  4.6637e-01, -2.4871e+00, -6.5473e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0650e+00,  4.0076e+01, -3.3450e-01, -2.7042e-01, -3.8601e-01,
        -3.7756e-02,  3.6290e-02,  5.2581e-02,  4.1921e-01, -9.4113e-02,
         3.5591e-01, -9.8014e-02,  2.0670e-02, -1.3212e-01,  7.2106e-02,
        -3.3662e-02,  5.8454e-02, -4.5751e-03,  9.4184e-02,  3.0668e-03,
        -1.4077e-01, -4.2348e-02, -9.0263e-02,  4.3103e-02,  1.8229e-02,
        -1.4340e-02,  2.5856e-02,  2.4172e-01,  4.8822e-02,  9.3948e-02,
         3.2678e-02, -7.1237e-02, -1.1369e-01, -3.3549e-02, -3.0585e-01,
         8.6402e-02,  1.7292e-01,  1.6039e-02,  1.2497e-01, -9.2458e-02,
        -2.2527e-01,  2.0643e-01, -2.5205e-01, -2.0251e-01, -1.2055e-01,
        -2.5911e-01, -1.2217e-01,  7.0428e-02,  7.9032e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0007e+00,  6.4847e+01,  9.6276e+00, -6.4704e-01,  7.0664e-01,
         1.8652e-01, -1.4414e-01,  1.3303e-01, -1.3515e+00, -4.3981e-01,
        -1.3782e-01,  3.4081e-02,  3.1865e-01,  1.1632e-01,  1.2164e-01,
        -1.4368e-01,  1.7847e-01,  9.7754e-02,  9.5196e-02,  4.8356e-01,
        -1.3257e-01,  5.9157e-01, -4.4563e-01,  1.3309e-01, -2.2614e-01,
        -1.2025e-01, -5.4467e-01,  7.1888e-01,  4.5388e-01, -7.2450e-01,
        -2.8268e-02,  5.3634e-01,  6.7722e-02,  1.6221e-01, -1.6714e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.2519, -16.5368,  -0.2911,   0.4385,   0.0892,  -0.2601,  -0.0318,
         -0.2307,  -0.0660,   0.0441,   0.1143,  -0.0303,   0.0486,  -0.0514,
         -0.2340,  -0.0702,  -0.1495,  -0.1420,  -0.1577,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8158e+00, -6.9967e+01,  8.7644e-01, -1.4390e+00, -4.4442e-01,
        -1.2168e+00,  7.1685e-01,  1.9223e-01,  4.7019e-01,  3.0498e-02,
         3.5986e-02, -8.7901e-01, -6.6888e-01,  2.9416e+00,  7.0626e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6192e-02,  1.6430e+01, -3.4991e-02, -1.6768e-01, -1.4177e-01,
        -1.0383e-02, -1.2238e-01,  7.4102e-03,  1.8202e-01,  7.9342e-02,
         7.4871e-02,  5.8702e-02,  7.6081e-02,  9.3503e-03,  6.8215e-02,
         7.9766e-02, -2.8671e-02, -1.9980e-01, -3.0382e-02,  1.6864e-02,
         3.5792e-01, -1.3483e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5375e-01,  2.4883e+01, -1.5487e+00, -3.6185e-01, -6.1797e-02,
         5.0430e-02, -3.7110e-01, -1.4772e-04,  6.1790e-03,  1.9009e-01,
        -1.2548e-01,  1.4813e-01, -2.2688e-02,  1.0788e-01, -7.1839e-02,
        -1.1754e-02, -3.7328e-02,  1.1599e-01, -2.3045e-01,  7.9305e-02,
        -1.5633e-01,  8.0480e-02,  1.6792e-01,  1.7307e-01,  3.5168e-01,
        -7.3877e-03,  2.1747e-02,  2.7706e-01, -1.4866e-01,  2.0059e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.9186e+00,  8.2260e+01, -4.7516e+00, -2.4651e+00, -9.2563e+00,
        -2.3180e+00, -1.0620e+00, -8.9683e-02, -4.3411e-01, -5.7742e-02,
         5.3354e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5761e-02, -9.9559e+00,  5.8992e-04,  1.7881e-01,  5.8120e-02,
         9.1782e-03,  3.1907e-02,  4.8291e-02,  9.0629e-02, -2.2828e-02,
        -4.1040e-02, -7.6314e-02,  5.1237e-02, -2.5925e-02, -3.9758e-02,
        -3.5077e-02,  9.5354e-02,  1.0890e-01,  9.5477e-03,  8.3483e-02,
         2.8936e-01,  1.1124e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6726e+00, -1.0729e+02, -2.2394e+00,  9.9495e-02, -1.6137e+00,
         1.4983e+00, -4.5876e-01,  7.8429e-01,  1.4949e+00,  8.3962e-01,
         5.9571e-02, -8.6969e-01, -2.7831e-01,  6.4369e-01,  5.1436e-02,
         1.2034e+00,  1.2960e-01, -1.7679e-01,  5.8211e-01,  3.8904e-01,
         3.0086e-01,  5.0198e-01,  7.7401e-01,  4.8013e-01, -1.0595e-01,
         2.2160e-01, -4.1142e-01,  1.2226e-01,  1.2465e+00,  4.9567e-01,
         3.5332e-01,  6.6745e-01, -2.3104e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3080e+00, -6.4098e+01, -3.3913e+00,  7.8604e-01, -5.1680e-02,
        -1.6187e-02,  2.4253e-01,  2.8753e-01,  1.7539e-01,  4.0015e-01,
         6.9733e-01, -4.1279e-01, -1.1972e-01,  5.9360e-01,  3.0222e-01,
        -7.1033e-01, -7.7254e-02,  9.0786e-01,  3.9132e-01,  4.9092e-01,
        -8.8307e-04,  1.1756e+00, -8.1975e-01,  8.7593e-02,  6.0051e-02,
        -4.7637e-01,  8.8236e-02,  4.0350e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3978e+00, -9.8597e+01, -1.8102e+00, -7.8337e-01, -1.3725e+00,
        -1.4730e-01, -9.9993e-01, -4.5426e-01,  1.4573e-01,  1.6391e-01,
         9.3955e-01, -9.1433e-01, -7.2109e-01, -4.9354e-01, -2.6811e-01,
        -9.5742e-02,  7.6934e-02,  6.1536e-01,  1.6344e-01, -6.6897e-01,
         5.1345e-01, -6.4224e-01,  1.2735e+00, -3.3919e-01, -4.0623e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-3.6931e+00, -8.8149e+01, -1.9206e-01, -2.0936e+00,  1.1482e-01,
        -1.6988e+00, -5.2989e-01,  3.1896e-01,  2.5319e-01, -5.5378e-02,
        -1.0586e+00, -8.5383e-01,  1.4631e-01,  4.1193e-01,  7.0239e-02,
         4.0958e+00,  4.1235e-01, -1.4726e-01,  6.4556e-02,  1.8133e-01,
        -4.7515e-01,  4.6287e-01,  4.6633e-01, -9.1147e-02, -3.0341e-01,
         1.0299e+00, -3.6814e-01,  4.7592e-02,  4.4768e-01,  7.9500e-01,
         4.4079e-01,  1.8713e-01,  4.7762e-01,  7.2602e-03,  1.3826e-01,
         6.5087e-01,  1.0471e-01, -2.0444e-01, -2.8894e-01, -2.3379e-01,
        -2.2693e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2870e+00,  5.5320e+01, -4.3092e-01, -1.7470e+00,  4.7874e-01,
        -2.3794e-01, -4.7143e-02,  8.7984e-02, -1.6265e-01, -4.6378e-02,
        -2.6146e-01,  3.2515e-01,  4.8506e-01, -7.2820e-02,  4.5611e-01,
        -3.6548e-01, -2.2753e-02, -3.6165e-01,  2.5651e-01, -1.2535e-01,
        -2.4148e-01, -4.4646e-01,  2.5386e-02, -7.1602e-01, -1.2300e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8416e+00,  4.3722e+01,  2.0478e+00, -2.5316e+00,  8.0310e-01,
        -3.0253e-01, -3.7870e-01,  2.6138e-01, -1.0488e-01,  1.7392e-01,
         2.0676e-02,  1.5077e-01, -2.0390e-01, -1.9824e-01, -2.7248e-01,
        -1.7829e-01, -4.3535e-01, -6.1295e-02, -5.9173e-01,  2.5059e+00,
         1.1096e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3947, 17.9035, -0.0617, -0.0272, -0.1508,  0.0364, -0.1249, -0.0550,
        -0.0200,  0.0377, -0.1130, -0.5656,  0.0449, -0.1348, -0.1651, -0.1990,
        -0.0296, -0.1455,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0439e-01,  1.3153e+01, -1.0092e-01, -3.1080e-01, -2.4370e-01,
        -1.6820e-02, -7.4666e-02,  1.6807e-03,  3.0260e-02,  5.6987e-02,
         8.6954e-02, -5.8927e-02,  2.7302e-02,  2.3769e-02, -3.2846e-02,
         1.2052e-01, -6.1484e-02,  3.5490e-02,  1.3743e-01,  3.8483e-03,
        -7.8532e-02, -3.7332e-02,  4.4407e-03, -3.5552e-02,  7.8948e-02,
        -4.5011e-02, -8.0204e-02,  1.0108e-01, -6.1634e-02, -6.5768e-02,
         5.9347e-02, -2.3253e-02,  1.1668e-02,  1.9523e-02, -3.0834e-02,
         1.1949e-01,  1.1378e-01,  1.5440e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4792e-02,  6.9073e+01, -2.0223e+00,  2.0303e+00,  1.3379e+00,
         4.9702e-01,  1.3545e-01,  1.4100e+00,  3.3113e-01, -3.1095e-02,
         1.6808e-01, -3.5734e-01, -2.7084e-01,  1.6457e+00, -1.5998e-01,
         1.2355e+00, -1.5970e-01, -3.5649e+00, -1.0277e+00,  2.5820e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.0079e+00,  1.2623e+02, -9.9229e-01, -4.5980e+00,  1.8167e+00,
         1.8009e-01,  3.1642e-01,  4.5498e-01,  6.4908e-01, -4.0557e-01,
         7.4065e-02, -1.9034e+00,  6.8272e-01,  7.0652e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8900e-01, -7.7203e+01, -1.8662e-02,  4.5824e-01, -2.0729e+00,
         1.2532e-01,  1.6661e+00,  3.9707e-01, -3.7120e-01, -4.9086e-02,
         2.2433e-01, -8.2757e-01,  4.7518e-01,  5.5748e-03, -1.7534e-01,
        -1.7302e-01,  3.3545e-02, -8.4442e-01, -6.4225e-01, -1.4886e-01,
         1.3966e-01, -1.7131e+00, -9.1701e-01, -1.4961e+00, -5.5927e-01,
         2.5860e-01, -1.3148e-01,  8.8578e-02,  6.2129e-02, -1.3469e+00,
        -4.2336e-01,  2.4396e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5016e+00, -5.3614e+01,  1.2613e+00,  2.5220e-01,  2.2260e-01,
         8.8777e-01,  1.6092e-02, -4.0864e-02,  1.0189e-01,  5.7295e-02,
         1.8646e-01,  6.9621e-01,  1.1888e-02, -1.5063e-01, -9.8277e-01,
         7.6070e-02,  1.1308e-01, -6.0902e-01,  8.4011e-01,  2.8931e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.7065e-01,  1.3924e+01,  3.1872e-01, -2.8572e-01,  1.6827e-01,
        -1.2758e-01, -9.7881e-03, -2.0912e-01, -8.3835e-02,  6.2059e-02,
         2.7569e-02,  1.2756e-01, -8.3856e-03,  7.2627e-02,  9.5242e-03,
        -1.0091e-01, -2.4143e-02, -2.7702e-03, -7.7104e-03, -1.0543e-01,
         8.2315e-02,  1.9362e-01,  7.7483e-02, -1.5109e-01,  2.6945e-02,
        -3.7393e-02,  7.9767e-02, -5.4632e-02,  1.6224e-02, -2.0765e-02,
         1.4218e-01,  8.7702e-02,  1.6577e-01,  1.1321e-01,  5.1622e-02,
         4.5356e-02, -2.5933e-02, -6.5809e-03, -4.3983e-02,  2.2010e-02,
        -1.2205e-01,  8.6512e-02,  1.6767e-02,  1.4660e-02,  3.5477e-02,
         3.7561e-02,  4.4255e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9481e-02,  7.4603e+01,  6.8130e-01,  6.4417e-01,  1.8475e+00,
         4.3441e-01,  5.8469e-01,  2.5288e-01, -4.4240e-01,  2.4862e-01,
        -1.5316e+00,  2.3595e-01, -2.4146e-01,  1.2576e+00, -1.7750e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7316e-01,  1.2033e+01,  4.5077e-03,  5.9260e-02,  1.1099e-01,
        -9.6107e-04, -9.5596e-02, -1.4273e-02, -9.3970e-02, -5.5815e-02,
        -4.1150e-02, -7.3810e-02,  1.8964e-02,  2.5468e-01, -1.6537e-02,
        -4.9645e-03, -6.1954e-03,  3.8242e-02,  4.2047e-02, -1.0495e-01,
         5.3532e-02, -4.2884e-02,  3.1199e-03,  2.2750e-02,  1.7141e-02,
        -6.4576e-02,  9.7049e-02,  2.3675e-01, -1.4958e-02,  4.4644e-02,
        -1.0001e-01, -4.0891e-02,  5.0254e-02,  4.6057e-02,  3.7947e-02,
         9.9004e-02,  3.3131e-02, -1.6878e-02, -7.3969e-02,  3.4184e-02,
         1.4854e-02,  1.8110e-02, -4.8713e-03,  3.1247e-02, -2.5639e-02,
        -1.0609e-02,  5.2066e-02,  4.0963e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 7.5112e-01,  9.0932e+01, -1.6713e+00, -4.3810e-01,  1.3870e+00,
         1.8496e+00,  3.8587e-01, -2.7766e-02, -1.5412e+00,  6.3134e-01,
         7.5156e-02, -1.7138e-01, -2.8207e-01, -4.1831e-02, -4.7923e-01,
         2.6850e-03, -1.1350e+00,  4.2945e-01, -2.7680e-01, -2.9973e-01,
        -2.4219e-01,  2.0352e-01, -4.3004e-01, -1.1299e-01, -9.8377e-01,
        -8.5448e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  3.0902, -40.0015,  -0.3708,  -0.9123,  -0.1198,   0.2114,  -0.1003,
         -0.4814,  -0.4298,  -0.1892,   1.0062,  -0.1351,   0.3069,  -0.0743,
          0.3173,  -0.1696,  -0.2831,   0.7429,   0.2085,  -0.2190,   1.1439,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0687e-01,  1.4393e+01,  2.7626e-01, -7.9672e-01, -5.4959e-02,
        -2.3744e-01, -5.9736e-03, -1.6575e-02, -3.2084e-02, -3.5253e-02,
        -1.1594e-01, -6.4323e-02, -2.7401e-01, -2.5151e-01,  3.3401e-02,
        -4.6652e-02, -1.1142e-02, -2.2985e-01, -8.7430e-02, -3.5355e-02,
        -3.0833e-02,  6.9219e-02,  8.2159e-02,  6.2262e-02,  3.4803e-02,
         2.8636e-02,  6.9907e-03,  1.8500e-02, -3.3256e-02, -2.2008e-02,
        -3.0484e-02,  2.6163e-02,  9.0798e-02, -3.7038e-02, -1.3732e-01,
         7.3646e-02,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0582e+00,  8.6189e+01, -3.5364e+00,  1.8420e+00, -8.9136e-01,
        -1.1006e+00, -1.7110e+00,  5.7035e-01, -1.4821e-02, -8.0732e-01,
         2.2431e+00,  2.6260e-01, -2.4798e-01, -1.4624e+00, -2.5823e-01,
         1.0955e-01, -7.6241e-02, -7.0064e-01, -2.9948e-01, -3.5680e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4113e+00, -9.3249e+01, -1.8615e+00, -5.5734e-01, -3.2518e+00,
        -7.0187e-01,  4.7220e-01,  6.5144e-01,  4.3642e-01, -4.1923e-02,
        -8.0873e-01,  3.0567e-01,  9.7453e-01,  4.9996e-01, -1.5558e+00,
        -4.6723e-01, -1.0301e-02,  2.7752e-01,  1.4924e+00,  5.5882e-01,
        -6.9053e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -3.1751, -91.9320,   0.1564,  -1.2570,   0.6691,   0.3716,   0.1055,
          0.3920,  -1.0634,  -0.1446,   0.4001,  -0.6103,  -1.6319,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2576e-01,  5.1565e+01,  2.0345e-01, -7.8546e-01,  1.6469e-01,
        -3.9830e-01, -9.8602e-01, -6.3576e-01, -3.2300e-02,  1.0085e-01,
         3.0779e-01,  2.1904e-01, -4.6386e-01, -1.1746e-01,  3.5613e-01,
         4.1203e-01,  5.1895e-02,  9.9031e-02, -1.7669e-01, -2.3069e-02,
        -1.7026e-01,  1.6112e-01, -3.9948e-01, -9.2112e-02,  3.0684e-02,
        -1.2649e-01, -5.5880e-01,  2.4330e-01,  8.3713e-02,  1.0228e-01,
         1.8184e-01,  9.1192e-02,  3.3265e-01, -9.2802e-02, -4.8207e-02,
         1.2943e-01,  3.1353e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7553e-03,  4.6363e+00, -1.2011e-02, -4.4263e-03, -5.4790e-02,
         4.5561e-05,  1.8295e-02, -1.5643e-02, -4.4053e-02,  2.0386e-03,
        -3.3559e-02,  6.9843e-03,  2.8385e-02,  1.1785e-02,  1.1974e-02,
         3.4311e-02,  1.1305e-02,  3.3655e-02, -1.1252e-02, -4.7935e-02,
        -1.0705e-02,  3.8993e-02,  1.9019e-02,  3.9130e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3629e-01,  3.5296e+01,  5.1589e-01, -2.4261e-01,  1.3304e-01,
         2.8047e-01,  5.9102e-02,  8.1140e-01,  9.4700e-01, -2.3545e-01,
        -7.3466e-01,  5.9266e-01,  2.2745e-01,  8.8834e-04,  4.2409e-01,
        -3.6326e-01, -1.1763e-01,  1.0109e-01, -1.1201e-01,  2.0738e-01,
        -1.1056e-02, -2.3630e-01,  8.7528e-03,  6.3450e-02, -5.2873e-01,
         5.5576e-02,  4.3861e-02, -1.5503e-01,  2.0532e-01, -9.8129e-02,
        -9.9183e-01, -3.5817e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1103e+00,  1.0262e+02,  3.0826e+00, -2.1593e+00, -1.2577e+00,
        -5.8798e-02, -2.6282e-01, -5.4362e-01,  1.5040e+00, -8.9751e-01,
        -6.3695e-01, -2.4406e-01, -6.0552e-01, -3.2683e-01, -3.2130e-01,
         3.8812e-01, -4.5282e-01, -7.2543e-02,  1.5421e-01, -4.5509e-01,
        -4.2942e-01, -7.1125e-01,  7.0246e-01, -4.1883e-01,  3.9651e-01,
         5.2767e-01,  9.4722e-01,  1.6210e-01,  1.3220e+00, -9.1532e-01,
         5.0060e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5210e+00,  3.1672e+01,  1.5394e+00, -1.0206e+00,  1.0118e-01,
        -5.2981e-02,  4.3511e-02, -5.4704e-01,  1.8888e-01, -2.4346e-02,
         2.6014e-01,  7.0140e-02,  1.0492e-01,  1.1111e-01,  2.4915e-01,
         1.3559e-01,  3.0766e-01,  3.7515e-01,  2.2930e-01,  1.9363e-01,
         4.6263e-01, -1.0174e-02,  2.4382e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9375e-02,  4.2217e+01, -5.0321e-01, -8.4373e-01, -2.0642e-02,
         5.9622e-01, -3.8679e-01, -3.3358e-02,  5.4619e-01,  1.0704e-02,
         2.3856e-01,  4.2601e-01, -7.8722e-02, -7.1239e-01,  1.4412e-01,
         2.6783e-01, -1.0687e-01, -2.2907e-01,  8.0814e-02,  1.4705e-01,
        -2.1779e-01, -1.5424e-01,  2.5063e-01, -2.3446e-01, -1.2575e-01,
         7.0371e-02, -1.2532e-01,  5.4283e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 5.1128e+00, -7.2994e+01, -6.2509e-01, -6.5256e-01, -5.5508e-01,
        -1.2985e+00, -5.2153e-01,  2.6156e-01, -5.7070e-01,  2.9333e-01,
        -1.9648e-01,  5.8613e-01, -3.2540e-01, -2.0746e-01,  4.1804e-01,
        -3.3576e-01, -2.2233e-02, -6.8226e-01,  4.8365e-01, -7.0960e-02,
        -3.4447e-01, -1.0774e-01, -3.7535e-02,  9.2866e-02,  5.6245e-02,
         4.8732e-01,  1.6681e-01, -1.5302e+00, -1.4920e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4762e-01,  6.9456e+01, -6.9482e-01,  4.4949e-01,  6.2726e-01,
        -3.2578e-01, -9.5125e-01, -1.6697e-02, -5.8436e-01,  1.0950e+00,
         2.3703e-01, -1.4641e-01, -2.1379e-01,  2.4837e-02, -1.3254e-01,
         7.4444e-02,  1.5743e-01, -3.5228e-02,  4.4948e-01, -3.0504e-01,
        -6.1319e-01,  1.1151e-01, -3.1092e-01,  9.3174e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9582e+00,  4.5590e+01,  1.5764e+00, -1.1239e+00,  1.6589e-01,
         6.0008e-01,  1.5072e-01,  2.9116e-01,  7.2482e-01,  7.6736e-02,
         1.2666e-01,  2.6079e-01,  5.2296e-01,  1.6557e-01,  5.6345e-01,
         1.1690e-01, -7.7172e-02,  6.0002e-02,  1.3263e-01,  3.0092e-01,
        -3.4907e-02,  2.1292e-01,  1.2922e-01,  3.4667e-01,  6.7441e-01,
         2.7567e-01,  3.0883e-02,  3.1514e-01,  1.1034e-01,  1.3502e-01,
         3.9508e-02, -8.2931e-01,  7.7804e-01, -1.8130e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5567e+00,  1.0118e+02,  1.4569e+00, -3.3315e-01,  3.8791e-01,
        -5.3679e-01, -7.4053e-02,  2.5492e-01,  4.7651e-01,  5.3163e-01,
        -9.3624e-01, -1.2264e+00, -1.3633e+00, -3.9622e-01, -1.5741e-01,
        -1.1840e+00,  8.5018e-02,  8.2324e-02, -9.0899e-01,  2.1190e+00,
        -5.0548e-01, -3.8435e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8981e+00, -7.6040e+01, -2.0967e+00, -9.0433e-02, -8.5126e-01,
         2.9547e-01, -2.4475e-01,  1.8721e+00,  5.0532e-01,  1.4258e-02,
        -1.3552e+00,  6.5907e-01,  1.7303e-01,  1.4706e-01,  1.9624e-01,
         6.1871e-01,  7.1443e-01,  2.0343e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.5174e-01,  2.7961e+01,  1.3562e-01, -1.2014e-01,  6.9324e-03,
         6.1559e-02,  7.5773e-02,  4.8833e-01,  2.9122e-01,  3.3072e-01,
        -1.1369e-01,  1.8778e-01, -2.9847e-01, -5.0626e-01, -5.5977e-02,
         1.1837e-01,  2.0018e-02, -2.5928e-01, -4.2261e-01,  9.2792e-02,
        -1.1723e-02, -2.9455e-01, -2.0304e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.3138e-01, -5.9056e+01, -9.3389e-01, -7.9077e-01,  9.1707e-01,
        -2.2514e-01, -2.4944e-01,  3.9594e-01, -1.5972e-01,  2.9810e-01,
         3.0376e-02,  2.8075e-01,  3.0165e-01, -4.6418e-01, -1.9943e-01,
        -3.1734e-01, -4.5331e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6137e+00,  2.5501e+01,  4.8571e-01,  1.1829e-03,  1.8228e-01,
         1.6209e-01,  9.3610e-01, -2.5128e-01,  1.7519e-01,  1.5194e-01,
         1.5001e-01, -1.5118e-01,  3.2944e-01, -3.8410e-02, -1.8702e-01,
         6.3477e-01,  6.4987e-01,  1.1432e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1377e+00,  6.0863e+01, -1.1782e+00,  6.5979e-01, -1.2387e+00,
        -4.0183e-01, -2.6584e-01, -3.5526e-01, -6.0017e-01,  5.0521e-01,
        -1.4562e-01, -3.8655e-01,  9.7153e-01,  8.2918e-02,  6.8790e-02,
        -1.3488e+00,  5.3904e-02, -5.3404e-01,  3.0931e-01,  2.8337e-01,
        -2.3668e-01, -2.7098e-01,  2.0570e-01, -3.7536e-01, -1.2891e+00,
         1.0646e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.7942e-02,  1.0887e+01,  8.8442e-02, -1.8807e-01,  1.2115e-04,
         1.4621e-01, -3.1703e-01,  4.6151e-02,  7.6269e-02, -1.3713e-01,
        -9.3878e-02, -1.0403e-01, -3.1227e-02,  8.4960e-02, -9.7374e-02,
         4.2721e-02, -1.2187e-01, -3.6686e-02, -1.4790e-02, -1.5694e-01,
         2.1356e-02,  1.5863e-02,  1.6384e-02, -8.0099e-02,  6.3953e-02,
        -8.8404e-03, -1.0357e-01,  7.9155e-03, -1.9200e-02, -1.0090e-01,
        -4.4758e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([   5.6048, -127.2803,    0.6441,    1.1972,    0.1597,    1.1162,
          -0.3647,   -3.4009,   -0.8986,    2.6418,   -0.4691,    0.2553,
           0.8044,    0.9514,    3.1631,    0.6877,    1.7875,    1.5099,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1318, 12.5075, -0.7719, -0.4568,  0.1043, -0.1095, -0.3069,  0.3224,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.1550e-01,  2.6655e+00,  2.5161e-02, -6.5344e-02,  1.1384e-02,
         3.8186e-02, -1.7726e-03, -3.0949e-04, -1.8295e-02,  2.3734e-02,
         1.0405e-03, -3.1832e-02,  8.6135e-03,  5.9091e-03,  1.0591e-02,
         3.7930e-03,  3.9449e-04,  8.6273e-03,  3.4129e-03,  1.3973e-02,
         2.7229e-04,  6.1420e-03, -3.4757e-03, -1.5640e-03,  8.2053e-03,
         8.5567e-03, -7.5206e-03,  7.6411e-03, -1.1191e-02,  3.7637e-03,
        -3.8962e-03,  7.3091e-03,  4.5896e-03,  1.0167e-02, -7.6970e-03,
        -2.2963e-02, -8.1036e-03,  1.9507e-04,  2.6013e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7983e+00, -6.1736e+01,  5.9047e+00,  8.7064e-01, -6.5161e-02,
         2.2639e-01,  4.3904e-01,  4.1153e-01,  7.4158e-01,  8.6124e-01,
        -1.7858e-02, -1.8628e-01, -7.7363e-01,  3.7767e-01,  1.6663e-01,
         9.1888e-02,  1.2076e-01,  7.2415e-01,  3.1898e-01,  1.5608e-01,
        -1.7231e-01, -6.4457e-01,  3.1467e-01,  3.4367e-01, -1.6835e-01,
        -8.6234e-02, -2.1763e-01,  4.9667e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5372e+00, -2.2931e+01,  6.9148e-02,  1.3157e+00,  2.4000e-01,
        -1.6963e-02,  3.7193e-01,  1.9658e-01, -1.0579e-01,  9.3729e-02,
        -1.0437e-02, -2.1859e-01, -5.0693e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  3.2429, 121.2060,  -1.9992,   1.4375,  -0.2441,   1.5524,  -2.1158,
          0.9754,   0.2635,  -1.6000,   0.2346,  -0.6785,  -0.5186,   1.7371,
         -5.6035,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.2445e-01, -9.5968e+01,  2.2041e-01,  1.9192e-01, -2.2194e+00,
         1.5413e+00, -6.2286e-01,  1.1204e+00, -8.4247e-01,  1.0396e+00,
         1.2857e+00, -3.0224e-01, -6.0238e-01, -5.6738e-02,  2.8788e-01,
        -3.0815e-01,  1.7829e-01,  1.8379e+00,  5.9080e-01,  4.3948e-01,
         1.4290e-01,  1.5440e-01,  4.9610e-01,  3.4660e-01,  6.1719e-01,
        -5.0563e-02,  1.7114e-01, -5.6098e-01,  4.0667e-01,  3.2776e-01,
         1.8065e-01, -1.2384e-02, -3.2611e-01, -7.2618e-01, -5.1712e-01,
        -4.3814e-01,  4.5355e-01,  2.2114e-01,  3.8915e-01,  1.4245e-01,
        -4.7354e-01,  3.6351e-01,  1.5083e-01,  3.3127e-01, -7.5464e-01,
        -5.8753e-01,  3.1173e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4239e+00, -7.5399e+01, -1.3118e+00, -8.9088e-01, -1.1149e-02,
         1.1201e-01,  4.7851e-01, -9.5889e-03, -4.1625e-02,  3.6560e-02,
         1.5045e-01, -1.8363e-01, -6.7473e-02,  2.0422e-02, -3.3745e-01,
        -1.1442e-01, -5.3136e-01,  7.5297e-02,  1.5716e-01, -8.3438e-02,
        -3.2437e-02, -1.8551e-02,  3.5381e-02, -2.6210e-02, -1.4165e-01,
         2.3762e-01, -1.0857e+00,  1.0550e-01,  1.7523e-01, -3.6228e-01,
         7.9941e-02,  3.7585e-01,  5.6254e-02,  5.5861e-02, -3.6220e-01,
         5.9347e-02, -1.2900e-01, -3.0639e-01, -2.7185e-01,  6.0700e-01,
        -2.0471e-01,  2.2635e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5916e+00,  6.9676e+01,  1.3766e+00,  2.3888e-01,  5.6970e-01,
         1.3270e+00, -4.7187e-01, -3.7013e-01,  3.7656e-01,  5.2337e-01,
         7.6005e-01,  8.5325e-01, -4.2613e-02, -1.0254e-02,  1.0971e+00,
        -5.4296e-01,  3.7514e+00,  8.1510e-01, -1.7153e-01,  1.7064e-01,
        -8.7941e-01, -5.5376e-01,  1.4042e-01, -3.8854e-01, -4.6382e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3004e-02,  1.2823e+00, -3.8762e-03,  4.1662e-03, -5.0555e-03,
        -1.1368e-02, -1.9424e-02, -4.2721e-03,  8.3363e-04, -3.2129e-03,
         2.9914e-03,  1.9308e-02, -1.0292e-03,  8.6038e-03, -6.1412e-03,
         3.8877e-03,  4.3179e-03, -6.2853e-03,  7.2645e-03,  1.4050e-02,
         1.9959e-03,  2.3253e-02, -8.7806e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0846e+00, -1.0505e+02, -9.8013e+00, -3.7119e-01,  1.3100e+00,
         1.0336e+00,  8.2107e-02, -8.7478e-02,  1.0617e+00,  3.3143e-01,
         8.4273e-01,  2.1328e-01,  1.2934e+00,  8.0544e-01,  8.3635e-02,
         6.7627e-01, -6.5842e-01, -2.4545e-01,  1.6948e-01,  6.1500e-01,
         4.3470e-01,  1.6826e+00,  1.9495e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3952,  4.4632, -0.2459, -0.1167, -0.0711, -0.0051,  0.1388, -0.0292,
         0.0312, -0.0208,  0.0925, -0.2436, -0.0611,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.4084, -14.3369,   0.5695,   0.1313,   0.2493,  -0.0414,   0.2354,
         -0.3616,  -0.2695,  -0.0832,   0.0215,  -0.2257,   0.2157,   0.3935,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5853, 77.2137,  1.2056, -0.9576, -0.4257,  0.2299, -0.0971, -0.1978,
        -0.3354, -0.1059, -0.2841,  0.2969,  0.2054, -0.3483,  0.2190,  0.1835,
         0.6115, -0.4803,  0.1406,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 2.1628e-01,  5.1229e+00,  5.7397e-02, -4.5708e-02,  9.1017e-03,
        -9.7253e-03, -5.4613e-03, -3.3848e-02,  3.4282e-02,  2.5703e-02,
        -2.4023e-02, -3.4101e-02, -5.8023e-03,  1.3133e-02, -3.0462e-03,
        -1.8884e-01,  3.4813e-02,  9.5977e-03,  1.3014e-02,  7.5444e-02,
        -3.5248e-02, -3.7428e-03, -1.3502e-02,  1.3556e-02, -1.0570e-02,
        -1.3300e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5142e+00,  1.0719e+02, -2.2926e+00,  1.1236e+00, -1.7905e+00,
        -4.0607e-01,  8.8648e-02, -1.0971e+00, -1.4570e-01,  4.7405e-01,
         4.2107e-01, -1.6068e-01, -5.9245e-01, -7.8984e-01,  3.5713e-01,
         5.5127e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7204, 47.1727,  1.5757, -0.5978, -0.5184, -0.1474, -0.3097, -0.0912,
         1.2592, -0.1722, -0.5373, -0.1330,  0.2813,  0.1151, -0.3447, -0.5791,
         1.1638,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0374e+00,  7.9000e+01,  6.7620e-01, -6.8896e-01,  3.5995e+00,
        -1.0686e-01,  5.5530e-02,  4.8643e+00, -4.3436e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.9472e-01,  9.0658e+00,  3.5702e-01, -9.3401e-02, -3.5010e-01,
         1.6231e-01,  2.8530e-02,  9.4930e-02,  2.5638e-02,  4.1607e-02,
         1.2908e-01, -6.2115e-02,  6.7701e-02, -5.6928e-02,  6.1208e-03,
         8.2421e-02, -1.0710e-01,  5.4404e-02,  9.7677e-02,  6.1074e-02,
         6.5329e-02, -5.6279e-03,  3.3541e-02, -2.8389e-02,  5.6725e-02,
        -3.3357e-02,  3.2916e-02,  3.6264e-02,  4.2637e-02,  8.8169e-03,
         5.5740e-02,  2.6215e-02,  2.2725e-02,  9.6108e-03,  2.9036e-02,
         1.9248e-02,  2.0754e-02,  5.5591e-02, -6.3429e-02, -9.6128e-03,
        -5.3641e-02, -1.3095e-02, -4.9033e-02, -2.0363e-02, -1.4232e-02,
        -3.5311e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3138e-01,  3.0649e+01,  2.9873e-01, -1.3174e+00,  1.2057e-01,
        -1.7753e-01,  1.2462e-02, -1.7772e-01,  7.8691e-02,  2.6642e-01,
        -4.6104e-02, -2.3319e-02,  5.3170e-01, -2.1275e-02,  4.2620e-04,
         2.0373e-01,  2.2220e-01,  1.3635e-01,  6.1496e-02,  3.2182e-01,
        -7.0353e-01,  8.4878e-03,  1.1033e-01,  1.0672e-02,  7.7062e-02,
        -8.6245e-02, -2.2367e-01, -2.0915e-01, -1.1411e-01,  4.2890e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.4454, -17.3697,   0.6388,   0.4955,   0.5591,  -0.3494,  -0.0385,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.7840e-02, -6.8450e+01,  6.3293e-01,  6.4125e-01,  1.7932e-01,
         4.8655e-01,  1.0127e-02, -2.0886e+00,  3.8256e-01, -2.4857e+00,
        -4.7927e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6010e+00, -1.0634e+02, -3.0734e+00, -5.5070e+00, -5.3063e-01,
        -3.2570e+00, -1.7925e+00, -4.3158e-01, -1.3514e+00, -3.0167e-02,
        -1.3308e-01, -8.8845e-01, -5.7848e-01,  5.6857e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8379e-01, -1.1058e+01,  5.9070e-02, -2.6598e-03,  1.3048e-01,
         1.2565e-01,  7.5840e-02,  1.0519e-01,  1.4493e-01,  3.4381e-02,
        -2.2806e-02, -2.9769e-02,  1.0598e-01,  1.4955e-03,  1.9920e-02,
         3.0564e-02, -3.5126e-02,  3.1149e-02, -1.0581e-02, -7.0482e-02,
        -4.9708e-02, -1.1099e-01,  4.1091e-02,  1.5111e-02,  3.8225e-02,
         8.2149e-02, -4.7666e-02, -1.6873e-02,  1.0404e-02, -1.3691e-01,
        -2.4603e-02, -8.6817e-02,  2.6166e-02, -2.4997e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4930e+00, -9.8811e+01, -6.0565e+00,  1.4911e+00, -1.1583e+00,
         5.4282e-01, -1.4338e+00,  5.9243e-01,  4.9504e-01, -9.7946e-01,
         1.0928e+00,  6.9142e-01,  2.4496e-01,  7.3549e-01,  1.2176e+00,
        -8.4642e-02, -3.5716e-01, -5.5381e-01,  6.1208e-01,  9.3942e-01,
        -7.1539e-01, -2.3453e-01,  7.2125e-01, -2.9748e-01, -3.6252e-01,
         3.2803e-01, -6.7618e-01,  1.0163e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3450e+00,  5.8068e+01, -4.9803e-02, -4.0800e-01,  4.4440e-01,
        -3.7585e-01, -3.1664e-01,  2.9362e-01,  2.2125e-01,  7.2620e-02,
        -9.1489e-03, -3.3572e-01,  9.6186e-03,  3.6257e-01,  4.0165e-01,
        -4.6969e-01, -2.3614e-01,  1.0447e+00,  3.5740e-01, -2.6799e-02,
        -2.7399e-01, -1.5041e-01, -4.5208e-02, -8.1494e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-3.7820e-02,  1.3190e+01,  2.8203e-01,  7.6335e-02, -5.6602e-02,
         1.7800e-02,  8.3021e-02,  2.6706e-02,  6.1471e-02, -4.0153e-03,
        -3.7817e-01,  3.2194e-02,  7.1233e-02,  1.1245e-01,  5.1880e-02,
        -8.3035e-02, -1.1046e-02, -3.3633e-02,  4.0187e-02,  3.1062e-01,
         4.2045e-02,  1.0309e-01, -1.0371e-01,  3.7028e-02,  2.9356e-02,
         6.6586e-03,  2.4449e-02,  1.0668e-02,  6.0284e-02, -3.1600e-01,
        -1.5270e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4204, 45.5837, -0.4875,  0.4865,  0.6255, -0.0868,  0.4636, -0.2703,
         0.2951,  0.3146, -0.6500, -1.4493,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0391e+00,  3.2542e+01,  3.7934e-03, -8.4714e-01,  1.8920e-01,
        -4.1041e-02,  8.9253e-02, -1.4381e-01,  2.6030e-02, -3.8586e-01,
         1.7400e-01, -4.6522e-01,  1.7457e-01,  1.4975e-01, -7.6326e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8595e+00,  4.2797e+01,  2.7345e-01, -5.5239e-01,  6.4877e-01,
         1.5478e-01,  7.3126e-01, -8.6582e-01,  2.8268e-01,  5.0290e-01,
        -1.3082e-02, -1.2151e-01, -4.3703e-02, -3.0936e-01,  3.7816e-01,
         4.4659e-02, -2.4059e-01, -5.2600e-01,  1.8124e-01, -3.6164e-01,
        -4.3600e-02, -7.3776e-02, -9.4983e-02, -1.4544e-02,  1.1200e-01,
         4.3910e-01,  1.4278e-02, -1.5040e-01, -1.0383e-01,  6.9161e-02,
         2.5854e-02, -3.2324e-01, -7.1402e-03], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2530e+00,  9.8982e+01,  7.5426e-01, -2.1950e+00,  2.0513e-01,
         2.8718e-02, -6.0343e-01, -1.7589e+00, -1.2682e+00, -3.1386e-01,
         1.6020e-02, -6.7803e-01, -1.7788e-02, -1.8194e+00,  7.7824e-02,
        -1.0005e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0474, 11.3343, -0.2631, -0.2459,  0.1702, -0.1455, -0.3228,  0.0793,
        -0.3828,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7994, 23.7138, -2.0826,  0.3184, -0.7727,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.2803e+00, -1.4312e+02, -1.5738e+00, -2.2929e+00,  9.4993e-01,
        -4.6943e-01,  1.5320e+00,  2.0687e+00, -1.1112e+00,  1.0472e-01,
        -1.9401e+00,  1.3113e-01, -1.1433e+00,  8.7984e-01,  1.0371e-01,
        -4.5532e-01, -6.0327e-01, -2.8247e-01,  6.2155e-01, -5.6206e-01,
        -3.3469e-01, -1.6522e-01, -2.5914e-01,  2.6132e-01, -1.8693e-01,
         5.0934e-01,  7.3135e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4806, 41.6806, -0.0937,  0.0935, -0.2694, -0.2799,  0.0426,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1645e-01,  4.2136e+01,  5.1045e-01, -6.5830e-01, -7.1370e-01,
         3.3089e-02,  1.2176e-01,  4.2957e-02, -3.3294e-01, -2.2519e-01,
         3.3584e-01,  3.9827e-01, -4.6667e-01, -2.1142e-01, -1.4043e-01,
        -2.5876e-01, -2.4729e-01, -2.4577e-01, -3.2714e-01, -4.0429e-01,
         2.4121e-01,  2.9356e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.9184, -21.0005,   0.2128,  -0.5271,  -0.1427,  -0.2259,   0.1545,
         -0.1060,   0.1295,   0.1177,  -0.2018,  -0.0611,  -0.0281,  -0.3692,
          0.1826,   0.0268,   0.1592,  -0.0849,  -0.4327,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6590e+00, -4.6703e+01, -1.6450e+00, -5.7083e-01,  7.2562e-02,
        -1.0345e+00, -3.9067e-01, -2.1290e-01, -3.0730e-01, -4.3944e-02,
        -3.1100e-01,  6.5567e-02, -4.9335e-02, -2.7775e-01,  1.3958e-01,
        -1.2732e-01, -2.8372e-01, -2.6642e-01, -2.5669e-01, -1.0749e-01,
        -1.2414e-01, -2.9371e-01, -1.7677e-01, -7.3353e-03,  4.6963e-02,
        -6.8785e-02, -8.0275e-02,  1.3074e-02, -3.7813e-01,  3.4133e-01,
         4.1241e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 2.2739e+00, -1.1401e+02, -1.1466e+01,  2.7291e+00,  3.5599e-01,
        -8.5682e-02,  1.4751e+00,  6.8282e-01,  1.0239e+00,  1.7226e-01,
         2.4745e-02,  7.6241e-01, -2.3565e-01,  4.8109e-01, -4.3237e-01,
         1.2585e-01,  6.5737e-01, -8.6720e-01, -1.2685e+00, -1.1146e-01,
         2.8117e-01,  6.2814e-01,  2.1899e-01,  2.3966e+00, -3.1319e-01,
         4.1478e-02, -2.7323e-01,  3.3797e+00,  7.1399e-01, -5.8451e-01,
        -2.6073e-01, -9.0820e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8759e+00,  1.2778e+02,  3.1372e-01,  3.6126e-01,  7.2369e-02,
         9.4163e-01, -4.1956e-01, -2.9587e-01,  8.1083e+00, -1.9186e-01,
        -4.1859e-01,  1.5567e-01, -3.6982e-01, -6.0176e-01, -1.9091e-01,
         5.4581e-01,  7.2909e-01, -7.9013e-01, -1.3019e+00,  2.0594e-01,
         3.2349e-01,  1.0193e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2185e-01,  2.7646e+01, -4.9089e-01, -2.6439e-01, -2.3914e-01,
        -5.1094e-02, -1.7005e-01,  2.6834e-01, -4.3286e-02, -6.8453e-02,
         2.2265e-01,  1.4047e-03,  3.8020e-01,  1.8674e-02, -2.1930e-01,
         3.4475e-02,  5.2008e-02,  3.2621e-01,  4.9631e-02, -5.2322e-01,
         1.0283e-02, -2.0916e-01,  1.0506e-01, -1.4012e-01, -1.3908e-01,
        -1.2839e-01, -5.0668e-02,  4.2529e-01, -1.0949e-01,  1.6176e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5281e-02,  5.8804e+00, -9.6514e-02, -7.5746e-02,  5.5917e-02,
         1.2444e-03, -8.2028e-03, -4.1489e-02, -2.5841e-02,  4.8910e-03,
         1.7097e-03,  5.2935e-02, -1.0149e-02,  9.0150e-03, -2.8352e-02,
        -3.0337e-02,  3.3952e-02,  1.2891e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1833e+00,  9.3488e+01,  1.6465e+00, -1.7134e-01, -2.2175e+00,
        -9.1198e-01, -8.2582e-01,  8.5303e-01,  3.0580e-01,  6.2596e-02,
         1.3260e-01,  7.0488e-02,  9.2387e-02,  5.8379e-01, -4.6371e-01,
        -6.5684e-01,  2.9805e-01,  9.0471e-02, -1.4149e+00,  7.9643e-02,
         2.6495e-01, -2.9262e-01,  1.9119e-01,  8.4086e-01, -1.9995e-01,
        -1.1689e-01,  2.7769e-01,  1.6358e-01,  6.7974e-02,  1.5282e-01,
        -1.3550e-01,  3.0087e-01,  4.3461e-01,  3.1749e-01,  6.6284e-01,
        -2.2628e+00, -9.2502e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6234, 24.3340, -2.2458, -0.1306,  0.1386,  0.4174, -0.0438,  0.5477,
         0.0376,  0.0276, -0.0323, -0.0339,  0.1162,  0.1675, -0.0665,  0.0561,
        -0.1473,  0.1264,  0.0746,  0.0629,  0.4229, -0.0259,  0.3581,  0.3877,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1749e+00, -7.7068e+01, -2.9972e+00, -1.6275e-01, -1.0440e+00,
        -5.9110e-02,  4.2988e-01, -4.5049e-01, -8.4136e-01,  1.7274e-01,
        -1.0224e-01,  4.4451e-01,  4.3352e-02, -1.4126e-01,  1.6611e-01,
        -1.8482e-01,  3.0555e-01,  1.1133e-01,  1.8773e-01, -2.0601e-01,
        -1.9518e-01,  1.5635e-01,  2.6914e-01, -7.0852e-02, -4.5999e-01,
        -3.3631e-01,  2.5051e-01, -3.2551e-01, -4.4360e-01, -1.3964e-02,
         3.2263e-01,  6.3173e-02,  7.8201e-02, -1.9756e-01,  3.8637e-02,
        -7.8339e-01,  6.4167e-02,  4.0234e-01,  6.9146e-02,  1.2568e-01,
         4.1392e-02, -2.0809e-01, -6.6111e-02, -6.8869e-01, -6.3939e-01,
        -2.0619e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4516e-02,  1.0742e+01, -2.1116e-02,  7.4405e-02, -3.3529e-01,
        -1.3896e-01, -2.0688e-02, -9.5491e-02,  2.8953e-02, -1.1989e-01,
         5.8695e-02, -4.7954e-02,  6.0208e-02, -3.9936e-02,  1.8584e-02,
         5.6561e-02,  1.7301e-01, -4.6981e-02,  1.4509e-03, -7.9926e-04,
         1.3791e-01, -2.9975e-01, -4.4971e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.2770, -45.3742,   2.4749,  -0.9488,  -1.0147,   0.2888,   0.0685,
         -0.6398,  -0.2206,   0.1195,   1.4002,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8319, 47.9300, -1.5629, -1.1943,  0.3027, -0.2053,  0.4902, -0.0620,
         0.8210,  0.5127,  0.9868,  0.2880,  1.1624, -0.2470,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3477e-02,  5.3978e+00, -1.2503e-02,  1.6459e-02,  3.5165e-03,
        -1.7622e-02, -1.4482e-01, -5.5746e-03,  3.9533e-02, -1.8969e-02,
         1.5499e-01,  1.6522e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1181e+00, -1.2204e+02, -5.8970e-01, -3.8908e+00,  1.1691e+00,
         2.2606e-03,  3.0089e-01,  7.2704e-02, -1.6474e-02, -5.1657e-01,
         4.2411e-01, -5.0360e-01, -2.8953e-01, -2.9549e-01,  2.1560e-01,
        -1.5620e-01,  1.6139e-01,  1.6677e-01, -6.5496e-02,  8.1277e-01,
         2.1903e-01, -3.3406e-02, -6.8037e-01,  4.7680e-01,  4.2332e-01,
         6.9853e-01, -7.1834e-01, -1.5012e-02,  4.2885e-01, -3.9616e-01,
        -2.1081e-01, -3.2108e-01, -9.2158e-02, -1.5701e-01, -2.2233e-01,
        -3.7810e-02,  1.8167e-01,  2.1674e-01,  2.9253e-01,  5.0014e-02,
        -1.3635e-02,  2.8124e-02, -1.3701e-01, -9.5310e-02,  2.5490e-01,
         3.7335e-01, -5.3423e-01,  4.5592e-01,  2.3666e-01,  3.8174e-01,
         2.1090e-03,  1.3904e-01,  4.0362e-01, -5.0463e-01,  1.5199e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-2.7494e-02, -4.6818e+01, -9.9093e-01,  4.0600e-01, -1.6530e-01,
        -1.4626e-01,  2.5045e-01,  4.1279e-01,  4.4014e-01, -4.1603e-01,
        -1.4746e-01,  1.1964e-03, -6.3720e-01, -7.3729e-02, -9.1639e-02,
        -3.8506e-01,  1.0956e-01, -9.4435e-01, -1.7761e-01, -2.7296e-01,
         1.3867e-02, -2.9826e-02, -3.0956e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.0656e+00,  1.0368e+02, -5.8578e+00,  1.1188e-01, -7.1156e-01,
        -3.7622e-01, -1.4210e-01, -1.8222e-01,  2.9766e+00, -2.3289e+00,
        -7.8401e-01, -1.2374e+00, -5.7495e-01,  1.9499e+00, -1.0541e+00,
         3.9170e-02, -9.6240e-01,  1.3606e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7855e+00,  2.4250e+01,  2.2623e+00,  8.5319e-01,  4.1762e-01,
        -6.0793e-01, -3.6710e-02,  3.2216e-01, -5.5047e-02,  1.5706e-02,
        -1.4409e-01,  6.2427e-02,  2.9185e-01, -2.5460e-02, -2.1624e-01,
        -2.0901e-01, -2.7450e-01,  5.7160e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.8261e-01,  1.2718e+02, -3.0037e+00, -1.4402e+00, -9.1644e-01,
         1.4441e+00, -2.8891e-01, -2.4417e-01,  5.3925e-01,  1.8034e+00,
         8.4865e-02, -1.4126e+00,  1.6800e-01,  1.1635e+00, -3.2777e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2054e-01,  3.5452e+01,  5.6895e-02, -6.6929e-01,  3.1869e-01,
        -2.2250e-01,  4.8770e-01, -4.2505e-02,  2.7396e-01,  4.2511e-01,
         1.2441e-02, -4.8408e-02,  2.0793e-01,  1.3155e-01,  1.1298e-01,
         4.4866e-01, -1.0751e-01,  9.5751e-02,  1.7045e-01,  5.5493e-01,
         6.8153e-02, -8.9486e-02, -8.9862e-02, -7.3223e-03,  8.3280e-02,
         6.5741e-02,  4.7525e-01, -5.6088e-02,  9.7346e-02,  4.5714e-01,
        -7.6176e-02,  2.4135e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3829e+00, -3.4853e+01,  1.9177e-01, -2.5697e-01, -7.1681e-01,
        -6.2067e-02,  6.3653e-02,  6.5415e-02,  6.7891e-01, -3.1175e-01,
         1.2151e-02,  1.1961e-01,  3.2026e-02,  1.2844e-01,  5.1242e-03,
         5.6084e-01,  3.5679e-02,  3.4225e-02, -1.3444e-01, -4.6997e-02,
         7.0832e-02,  4.7438e-01,  2.9032e-01,  4.3579e-02, -7.4454e-02,
        -6.6592e-02, -5.3555e-02, -3.6278e-01,  1.0182e-01,  3.5298e-02,
         3.9871e-01, -1.7790e-01, -9.6663e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1812e-01, -3.2277e+01,  2.6618e-01, -1.3454e+00, -5.7062e-01,
         1.7194e-01,  2.4273e-01,  2.9684e-01,  2.2145e-01, -3.8292e-01,
        -7.4523e-02, -1.5102e-01, -1.2973e-01,  1.6235e-01, -1.0453e-01,
         1.9257e-01,  4.4799e-02,  1.6379e-01, -5.7553e-02,  4.9133e-02,
         2.7759e-02,  8.1508e-02,  3.7146e-02,  5.8362e-02, -3.2244e-02,
         3.4724e-02, -9.0956e-02,  1.9947e-02,  1.5793e-02,  8.9926e-03,
        -1.3157e-01, -1.2724e-01,  3.2033e-02,  2.3139e-01,  7.2540e-02,
         1.7732e-01,  4.3126e-02,  1.0925e-01,  4.1531e-02, -2.6372e-01,
         3.7737e-02,  6.1233e-02,  5.7603e-02,  3.6746e-02, -1.3122e-02,
        -1.0383e-01,  2.3851e-02,  2.5123e-03, -4.5904e-02,  4.7230e-03,
         1.2840e-01, -1.0470e-01, -1.3584e-01, -2.2823e-02, -8.1815e-02,
         1.2180e-01, -1.7626e-01, -3.3672e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.7410e+00,  6.0163e+01, -4.3115e-01, -2.2732e+00, -6.2925e-01,
        -8.8469e-01, -2.2905e-01, -1.9439e-01, -5.9953e-01,  5.4538e-01,
        -1.6725e-02, -1.7907e-01, -5.8455e-01, -1.1802e+00,  3.2952e-02,
        -9.3621e-02, -1.8802e-01, -1.1092e-01, -3.7219e-01,  1.1654e-01,
         4.2744e-01, -4.5041e-01, -3.4782e-01, -5.7816e-01, -8.9187e-01,
        -2.0006e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1534e-02,  1.4997e+02, -3.6919e+00, -1.6897e+00, -3.8358e-01,
         2.8161e+00, -3.8308e-01, -2.9976e-01, -1.6399e+00, -9.7108e-01,
        -6.0237e-01,  2.6574e-01,  1.2660e+00,  1.2056e+00, -2.5400e-01,
         1.3478e-01,  5.5591e-01, -5.1082e-01,  2.3456e-01, -2.0609e-01,
        -1.4194e-01, -1.5004e+00, -2.1977e+00, -1.4374e-01,  5.8405e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3057e+00, -1.0089e+02, -4.1521e+00, -7.8780e-01, -2.2957e-01,
        -4.1490e-01, -5.5314e-01, -1.0090e+00, -4.7735e-01, -9.9513e-01,
         1.7135e-01,  1.9033e-01, -4.7804e-01, -2.0620e+00,  1.3827e+00,
        -1.0712e+00,  8.3417e-01,  7.9777e-02, -5.2794e-01, -3.8465e-01,
         6.1673e-02,  6.5423e-01, -3.8645e-01,  3.1180e-01,  3.9356e-01,
         5.5290e-01, -1.1593e+00, -5.8435e-02,  3.1576e-01,  3.3363e-01,
        -5.6519e-01,  2.4558e-01, -3.0135e-01, -5.4125e-01,  4.9269e-01,
        -2.1100e-02,  2.1392e-01,  2.7664e-01,  4.3259e-01,  8.0491e-02,
         7.2587e-01,  2.0757e-01,  2.5729e-02,  9.7571e-02,  7.4852e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8447e-01,  3.9144e+00,  1.0607e-01,  8.1895e-02,  3.1484e-02,
         4.6030e-03,  2.1260e-02,  3.4260e-02, -4.1016e-02,  3.2699e-03,
         2.4304e-02, -4.2861e-02, -4.1311e-02,  1.9695e-02,  9.7536e-03,
         3.0479e-02, -6.4794e-02,  9.8630e-03,  2.9805e-02, -1.9643e-03,
         3.3604e-02, -1.1273e-01, -8.8256e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0886,  4.0660,  0.0198, -0.1843,  0.0361,  0.0825,  0.0756,  0.0107,
        -0.0115, -0.0756,  0.0310,  0.0148,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.4928e+00,  8.0014e+01, -2.4876e+00, -1.3911e+00, -9.5768e-02,
         9.8286e-01, -4.8819e-01, -5.9230e-01,  8.5343e-01, -2.7306e-01,
        -6.4422e-01, -4.1677e-01, -5.4514e-01,  8.9577e-02, -5.1612e-01,
        -2.6213e-01,  3.4489e-01,  5.1371e-02, -1.3282e-01,  7.1912e-01,
         5.3458e-01, -2.5097e-01,  5.5607e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0691e+00,  1.0221e+02,  3.0848e+00, -5.9408e-01,  4.5012e-01,
        -9.5937e-01, -1.3595e+00,  3.5303e-01, -2.9964e-01,  1.2094e+00,
        -4.0734e-01, -2.2539e-01, -4.1708e-01, -2.0252e-01, -2.1092e-02,
        -5.7327e-01, -2.5265e-01, -1.2465e+00, -1.9483e-01,  5.6626e-02,
         2.7950e-01, -1.2264e-01, -3.1514e-01, -2.5838e-01, -1.3459e-01,
         4.2312e-02, -7.5545e-01, -4.1098e-01, -6.9245e-01,  3.8336e-02,
        -2.4108e-01,  1.6237e+00,  3.5825e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1263, 42.3651, -1.4306, -1.1517, -0.8825, -0.1944,  0.4594, -0.2509,
        -0.1059, -0.1252,  0.2174,  0.1199, -0.9460, -0.2117,  0.2917, -0.2452,
         0.1919,  0.8957,  0.3964,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.9083e+00,  1.4374e+02, -5.0513e+00,  8.3199e+00, -7.4536e-01,
         1.2475e+00,  1.6762e+00, -1.3834e-01,  1.1501e+00, -9.5293e-01,
         4.9270e+00, -4.9476e+00,  2.5071e+00, -8.5331e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5119e-01,  1.0683e+01,  3.8224e-01, -3.0675e-02,  3.7808e-02,
         2.9926e-02,  1.5356e-02, -6.6484e-02,  2.6322e-02,  3.5505e-02,
        -8.5299e-03,  2.3674e-02,  6.4635e-02, -2.3575e-01,  2.0401e-02,
         3.0000e-02,  1.7803e-02,  3.6882e-02,  3.7878e-02,  1.2083e-02,
         8.7300e-03,  1.6900e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.4385e-02, -1.4484e+01, -1.7354e-02, -1.6337e-01,  5.4542e-03,
         6.3927e-02,  1.7810e-01, -6.1174e-02,  6.9964e-02,  2.7896e-02,
         2.3004e-02,  5.4143e-02, -3.4401e-02,  5.5976e-02, -9.9962e-02,
         2.2903e-02,  1.2225e-01,  3.1732e-02, -1.1102e-02, -2.1978e-02,
         1.8546e-02, -1.8912e-02, -1.8828e-03,  3.4560e-02,  6.1453e-02,
        -5.4388e-02,  6.9579e-02,  4.5674e-02, -5.2078e-02,  4.6277e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6364e-02,  2.5849e+00, -3.5569e-02, -3.0435e-02, -1.5946e-03,
         2.3776e-02,  7.2499e-02,  2.3822e-02, -2.1040e-02, -2.6520e-02,
        -5.6004e-02, -2.2052e-02,  2.0040e-02,  2.3238e-02,  1.5915e-02,
        -2.9426e-02, -3.6285e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1264,  1.2600,  0.0795,  0.0607, -0.0160,  0.0078, -0.0280, -0.0058,
         0.0689, -0.0332, -0.0094,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5612e-01, -3.7397e+01,  4.6827e-01,  5.7771e-01,  2.8821e-02,
         1.6500e+00,  1.0729e+00,  1.2203e-01,  3.1221e-02, -1.9203e-01,
        -1.5133e-01,  3.2186e-01,  9.9347e-02, -1.1679e+00, -4.6645e-01,
         1.9199e-01,  3.4089e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9749e+00,  1.3787e+02, -6.8223e+00,  3.7796e+00, -7.5581e-01,
         1.4078e+00, -9.5963e-02, -2.4723e-01,  8.3926e-01, -9.5495e-02,
        -9.1636e-01,  1.0388e+00,  3.8373e-01,  3.5419e-01,  1.8746e+00,
         4.7774e-01, -3.6648e-02, -9.9356e-01, -2.4774e+00,  2.4738e-01,
        -3.3347e-01,  2.7215e-02,  3.4153e-01, -3.1675e-01, -3.4830e-01,
         2.4450e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.1379e-01,  7.2809e+00,  2.1486e-01, -1.2006e-01,  8.7304e-03,
         1.2357e-01,  8.1410e-02,  7.8579e-02,  2.8544e-02, -2.6717e-02,
        -3.0334e-02, -6.4065e-03, -5.2041e-03,  3.3903e-02, -1.0138e-02,
        -6.6656e-02, -5.1993e-02, -4.3127e-02,  1.5324e-02, -3.4774e-02,
         7.1449e-02, -8.1244e-04,  2.0039e-02, -7.0662e-03,  5.1523e-02,
        -1.6060e-03,  2.7130e-02, -9.4912e-03, -4.5558e-02,  1.6200e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1454e-01,  1.2074e+02, -3.2010e+00,  8.0280e-02,  3.0703e+00,
        -1.0873e+00,  7.3560e-01,  9.6114e-01,  1.5755e-01,  2.6827e-01,
        -7.5555e-01,  3.6136e-01, -8.4936e-01,  4.2233e-01,  9.3061e-02,
         1.3845e-01, -1.9474e+00, -8.4650e-01,  1.3037e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-9.5905e-01,  9.1902e+00, -1.9109e-01,  8.7970e-02,  4.1745e-03,
        -2.2553e-02,  1.7218e-01,  1.4124e-02,  1.0253e-01,  2.8608e-02,
         1.2112e-02,  5.0799e-03,  3.1007e-02,  3.9229e-02, -9.3824e-02,
         4.0802e-02,  9.8388e-02,  2.1754e-01,  1.5667e-02,  6.0923e-02,
         4.3254e-02, -4.0631e-02, -4.6766e-03, -2.2196e-02, -1.1966e-01,
        -8.0719e-04,  1.8613e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1279, 49.5195,  1.1977,  0.1790,  0.4981,  0.5133,  0.1212,  0.0885,
         0.3540,  0.0826, -0.1470,  0.5239, -0.7974, -2.3830,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.6237e-01,  1.0617e+02, -2.3517e+00, -8.3513e-01, -6.7744e-01,
         1.7442e-01,  8.4322e-01,  1.1120e+00,  2.6627e+00,  1.2435e+00,
        -2.7049e-01,  6.0725e-01, -1.7836e-01,  6.3593e-01, -1.3658e-01,
        -9.7253e-01,  5.3577e-01, -7.2233e-01,  5.9582e-01,  4.3127e-01,
        -6.9216e-02,  1.8444e-01, -3.9235e-01, -1.5880e+00, -3.0900e-01,
         8.4798e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4875e-01,  8.6837e+00,  1.1276e-01,  4.7902e-03, -2.5633e-02,
        -1.6915e-01,  5.4988e-02, -5.5719e-02, -1.1057e-02, -2.1445e-02,
         5.5949e-03, -3.7917e-02, -5.8203e-02, -2.1639e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4207e-01,  7.3907e+01, -2.6138e-02, -1.8737e+00, -2.5302e-01,
         3.0158e-01,  2.2496e-01,  2.1293e-02,  2.3004e-01,  3.2387e-01,
        -1.4678e-01, -4.0495e-04,  2.7941e-01, -1.9024e-01,  8.0490e-01,
         4.1361e-01, -4.1915e-01,  6.9239e-01, -8.3399e-02, -6.2121e-01,
         1.0528e-01,  4.1179e-01,  8.1603e-01,  3.7572e-01, -8.2523e-02,
         2.0186e-01, -6.6047e-01,  7.5164e-02,  3.2736e-01,  6.5157e-01,
         1.8991e-01,  2.3407e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8711e+00,  7.4116e+01, -5.8500e-01, -2.4319e+00, -1.4533e-01,
         1.3960e+00, -1.7068e+00, -2.9080e-01,  3.5540e-01,  8.5746e-02,
         3.2765e-01, -1.9778e-01, -3.7379e-01,  2.3297e-01,  9.2778e-01,
         2.0648e-01,  4.6192e-01, -3.9135e-01,  2.6515e-02,  3.3474e-01,
        -2.1391e-01,  3.0185e-01, -1.5030e-01, -2.8203e-01, -2.6049e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0500,  1.5134,  0.0462, -0.0108, -0.0323,  0.0396,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0404e+00,  2.2709e+01,  1.6239e-01,  3.4265e-01,  4.7047e-01,
         5.1658e-02,  1.4548e-01, -5.5773e-02,  2.5914e-01, -1.5549e-01,
        -6.6115e-02,  2.0144e-01,  3.5268e-02,  4.3360e-01, -1.1092e-01,
         1.3276e-01,  6.5487e-02,  9.8313e-02,  4.9329e-02,  1.7157e-01,
        -1.2393e-01,  1.9731e-01, -1.4733e-01, -3.0603e-03, -9.0151e-03,
        -7.8915e-02, -6.4526e-02,  2.3117e-01,  1.7500e-02,  3.4991e-01,
         8.4206e-02,  4.4426e-02,  4.5513e-02, -7.1188e-02,  2.0851e-02,
        -3.7923e-02,  3.8529e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6356e-03, -9.9766e+00, -2.8845e-01, -2.8786e-01, -1.5426e-02,
        -1.4521e-01, -4.1663e-02, -6.3511e-02,  1.9825e-02,  1.4651e-02,
         1.2288e-02, -2.2914e-02, -1.7383e-02, -6.8742e-02,  3.9766e-02,
         6.7835e-02,  8.5888e-02, -5.8112e-02,  6.1503e-03, -1.0814e-01,
        -2.1988e-02, -1.0554e-02,  7.6090e-02,  3.2101e-02, -2.5784e-02,
         1.7396e-02,  3.4041e-02, -1.3938e-01,  4.4580e-02, -4.9072e-02,
         2.1210e-02, -2.0329e-02, -5.0933e-02,  3.5218e-02,  4.6862e-02,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8994e-01,  6.1855e+00,  2.1629e-01,  3.6606e-01,  5.2235e-02,
         1.8391e-01, -1.1002e-01,  8.9284e-02, -5.7003e-03,  2.6011e-01,
         1.0194e-01, -1.1521e-01,  4.8650e-02, -2.7388e-01, -2.5224e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0817e+00,  3.2349e+01, -3.9986e-02,  2.6284e-01, -3.0938e-01,
        -1.8300e-01,  7.7653e-02,  7.9078e-01,  1.8974e-01,  6.3345e-02,
         7.7406e-02, -1.5910e-01,  3.1682e-01,  4.0506e-02,  1.7779e-02,
         6.9056e-02,  5.4912e-03, -5.4451e-01, -1.1918e-01, -2.0108e-01,
         1.0084e-01,  8.6988e-03, -1.9547e-01,  4.6037e-02, -9.7884e-02,
         2.1274e-01, -5.7784e-02, -4.4059e-02, -1.0746e-01,  7.5006e-02,
         1.2383e-01,  3.4467e-01, -3.6979e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4528e-01, -5.9319e+00,  1.0016e-01,  1.0998e-02, -3.3642e-04,
         9.3515e-02,  6.6569e-02,  7.0222e-03,  8.4623e-02,  1.0413e-02,
         3.9600e-02, -1.0016e-01,  3.7411e-02,  1.8275e-02, -9.9551e-02,
         2.4122e-02,  2.2837e-02,  1.2020e-02, -2.3276e-03, -1.6239e-02,
         1.0364e-01, -7.8825e-03,  1.4702e-02,  3.8080e-03, -1.3269e-02,
        -5.8856e-02,  5.6796e-05,  5.0576e-02,  6.1892e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.0389e+00,  4.2717e+01, -4.9033e-01, -4.3620e-01,  4.5534e-01,
        -3.6367e-01, -1.3326e-01, -3.9210e-01,  1.4397e-01, -4.5399e-01,
         5.1771e-02, -3.6672e-02,  2.8242e-01, -1.3247e-01, -7.8792e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2491e-02,  3.1603e+01, -3.7577e-01,  2.7693e-01, -4.2871e-01,
         3.0954e-01,  4.0184e-02, -1.0717e-01,  1.4349e-01, -8.7794e-02,
        -9.4243e-02, -1.8195e-01, -1.0367e-01,  3.8620e-02,  3.2492e-02,
        -2.3059e-01, -3.7637e-02,  4.7093e-02, -3.2250e-02, -1.1629e-01,
        -5.5914e-02, -1.3631e-01, -2.2129e-01, -4.7094e-03, -1.8839e-02,
        -3.1265e-04,  1.5153e-02, -4.1581e-03, -1.7147e-01,  7.2088e-03,
         8.3955e-03,  1.5684e-02, -1.9133e-02, -4.7312e-02, -1.0309e-01,
         1.0560e-01, -1.0629e-01,  1.0103e-01, -9.4159e-02, -6.1325e-02,
        -1.2302e-01,  6.5970e-02, -2.4304e-02,  5.3242e-03,  6.1577e-02,
        -3.7258e-01, -1.7716e-01,  7.0111e-03,  3.6482e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8223e-02,  2.1601e+01,  3.1586e-01, -1.2439e-01,  1.4882e-02,
         5.0010e-02,  1.0749e-01,  5.3044e-02, -3.7111e-01,  1.1030e-01,
         1.0211e-01,  4.3447e-02, -1.9293e-01,  5.8253e-02, -2.6569e-02,
         4.3090e-03,  9.7449e-02,  3.9828e-02,  1.1496e-01,  7.2413e-02,
         8.0379e-02,  9.5695e-02,  1.0707e-01,  1.2656e-01,  1.2534e-01,
        -1.8941e-01, -4.2012e-02,  5.4289e-02,  4.6098e-02, -4.2593e-02,
         8.4834e-02,  9.7789e-02, -1.1577e-01,  1.9279e-01, -2.7051e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1794e+00,  1.2745e+02, -4.2598e-01, -9.9522e-01, -1.3771e+00,
         8.7164e-01, -7.6732e-02,  1.7496e-01, -6.9538e-01, -3.2607e-01,
        -1.1037e-01,  3.4153e-01, -1.2678e+00, -1.9283e-01,  5.8769e-01,
         7.7968e-02,  4.5411e-01, -1.6882e+00,  1.5287e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0883e-01, -9.6424e+01, -8.3742e-02, -4.5865e-01, -6.7597e-01,
        -1.8632e+00, -4.1639e-01, -4.0228e-01, -1.0864e+00, -3.9371e-02,
         3.4818e-01, -2.2323e-01, -4.4250e+00,  2.9870e+00,  2.1689e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7862, 25.6684, -0.5844,  0.1519,  0.1242, -0.1832,  0.3095,  0.1666,
        -0.0468, -0.0927, -0.1213,  0.0498, -0.2189, -0.1047, -0.0664, -0.1364,
        -0.1159, -0.1416, -0.2311,  0.1199,  0.0392, -0.1634,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5429e-01,  1.7300e+01, -4.2778e-01, -3.0780e-01, -2.5791e-01,
        -3.1820e-01, -1.7385e-02,  1.9987e-01,  1.0022e-02,  1.5167e-02,
        -1.1850e-01,  9.0203e-02, -1.0948e-01, -1.8345e-02, -6.6017e-02,
        -1.3387e-01,  1.1138e-01,  1.7445e-01, -5.1289e-02, -7.5928e-02,
         1.5788e-02,  1.1952e-02,  3.9044e-02,  1.1260e-01, -1.9091e-01,
        -2.8262e-02,  1.1069e-01, -9.4089e-03, -9.9346e-02,  1.0042e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9508e+00,  1.6067e+01, -1.6513e-01, -4.8472e-01, -5.4762e-01,
        -1.2534e-01,  5.6527e-03,  1.8059e-02, -3.3397e-01, -3.3787e-01,
         4.0715e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4594e+00, -4.5680e+01, -2.6413e-01,  5.4239e-01,  3.4459e-01,
         3.0482e-01,  2.3554e-01,  1.5010e-01, -1.8405e-01, -4.5122e-02,
         2.5674e-01,  4.8819e-01,  2.5809e-01, -5.0726e-02, -3.3138e-02,
         1.3713e-01, -4.1587e-02,  1.4378e-01,  7.5164e-02,  4.3040e-01,
        -1.2217e+00,  1.6364e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8533e-01,  1.3940e+01, -5.5702e-01,  3.0485e-02, -7.7413e-02,
         7.0534e-02, -5.3392e-02,  7.9648e-02, -1.9864e-01, -5.7619e-02,
        -1.5255e-01, -1.4848e-01, -5.6414e-02, -1.3373e-01,  3.1136e-02,
        -6.4850e-02, -1.2006e-01, -3.1154e-02,  6.4095e-02, -3.3573e-02,
         2.3175e-02, -4.6188e-02, -3.9694e-02, -1.9877e-02, -1.2792e-01,
        -8.0951e-02,  2.7709e-02, -5.8888e-04, -9.2125e-02, -3.0666e-02,
         2.2823e-03,  3.9072e-02,  3.1007e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.3787, -89.3082,  -2.2126,   0.4328,   0.3669,  -2.0807,  -0.0916,
          1.6999,   0.1048,   0.1079,  -0.0895,   0.3098,   0.3975,   0.3971,
          0.6835,  -1.0600,  -0.2213,   0.1100,   1.1337,  -0.3594,  -0.3406,
         -0.1002,   0.5194,   0.5603,   0.2686,  -0.1501,   0.1643,   0.1340,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3056e-01, -2.1316e+01, -2.7953e-01,  4.0329e-01, -3.0922e-01,
         5.7479e-02,  2.5951e-01,  1.0529e-01,  4.4184e-01,  3.3616e-02,
         2.7153e-01, -9.6151e-02, -1.1060e-03,  2.8272e-02, -1.3579e-01,
         3.2060e-02, -7.2003e-02,  6.3826e-02, -4.9742e-02, -1.2002e-02,
        -5.6979e-02,  4.5850e-03,  1.9342e-02, -1.1564e-02,  2.1159e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 1.1166e+00,  3.4444e+01,  3.3776e-02,  1.1560e-01, -1.2291e-01,
         4.1373e-02, -8.8877e-02, -2.1005e-02,  5.9378e-01,  4.0025e-02,
         3.3950e-01,  1.0050e-01,  1.6822e-02,  3.8255e-02, -1.4719e-01,
        -8.9040e-01, -1.0343e-01,  2.5969e-03, -1.8504e-02,  1.4887e-01,
         5.6125e-02, -1.0128e-01, -4.3035e-02,  3.7665e-02,  1.7796e-01,
        -1.3151e-01, -3.3415e-02,  2.7693e-02, -1.2273e-01, -8.4343e-02,
        -2.5214e-02, -2.5272e-01, -1.8368e-01, -4.5686e-02, -5.6395e-02,
         8.0888e-02,  7.6404e-03,  3.8419e-02,  5.5440e-02, -1.8514e-01,
        -7.9364e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0287e-01,  1.1120e+01,  4.7480e-02, -2.1203e-01,  1.8506e-01,
         7.7525e-02, -8.6058e-02,  4.0457e-02, -4.2595e-02,  4.5523e-02,
         3.9636e-02, -2.8901e-01,  7.2668e-02,  4.2450e-04,  1.1823e-02,
        -1.2414e-02,  1.6405e-02, -5.0607e-02,  2.6555e-02,  6.9234e-02,
        -2.5037e-02,  6.5086e-02,  4.7378e-02, -1.4121e-01,  2.6514e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1051, 14.5061, -0.1081, -0.1645, -0.1425, -0.0576, -0.0382, -0.0320,
        -0.0489,  0.2061,  0.0421,  0.1157, -0.0361,  0.0996, -0.1120, -0.0747,
         0.0499,  0.0607, -0.1409, -0.0927,  0.2366,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0802e-01,  8.2980e+00,  3.2829e-01, -1.9360e-01,  2.8398e-02,
         3.1014e-02, -7.8536e-02, -1.6773e-02,  1.9048e-02,  4.3381e-04,
        -7.5313e-02, -1.1997e-01,  5.0284e-02, -9.0833e-02, -7.3864e-02,
         5.3572e-02, -1.4717e-02,  1.0425e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.1161e-01,  3.7679e+01, -9.3300e-01,  2.4056e-01, -3.9976e-01,
         3.3559e-02, -9.6683e-02, -2.9181e-01, -2.3137e-01, -1.5319e-01,
        -1.7316e-01, -1.4799e-01,  1.6142e-01,  1.1814e-01, -8.1550e-02,
         5.5353e-01,  8.2852e-02,  3.8590e-01, -9.7693e-03, -2.7194e-01,
         4.3113e-02,  1.4874e-02,  2.9900e-02,  7.4470e-02,  4.9028e-01,
        -1.2822e-01, -2.3844e-01, -8.3707e-02, -3.7905e-01, -2.4271e-01,
         2.6433e-01,  8.0596e-02,  2.7381e-01,  6.7587e-02,  1.3297e-01,
         1.8785e-01, -1.2878e-01,  1.3614e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9885e+00, -1.3693e+02,  6.0073e+00, -1.3069e+00,  7.3080e-01,
        -1.7831e-01, -5.3336e-02,  8.2351e-01,  4.3549e-01,  6.9642e-01,
        -1.1029e+00,  4.1551e-01, -3.8317e-01, -6.6830e-01,  4.0442e-01,
        -3.1108e-01,  3.3481e-01,  3.0592e+00,  4.3817e-01, -1.3452e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5189e-01,  3.0712e+01,  8.0664e-01,  4.2219e-01,  1.4727e+00,
         4.5865e-01,  1.1617e-01,  7.9437e-01,  3.2649e-01,  3.1515e-01,
        -6.9779e-01, -2.3552e-01, -3.8132e-01, -2.3435e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7536e-02,  1.8530e+00,  4.0192e-02,  5.7916e-02, -7.1708e-03,
        -1.7029e-03, -6.5315e-03,  7.7535e-04, -2.0301e-03, -1.3423e-02,
        -1.2594e-02,  1.6612e-03,  4.7327e-03,  1.1588e-02,  1.7250e-02,
         1.2519e-03, -1.3605e-02, -7.0980e-03,  2.3444e-03,  4.4874e-03,
        -2.0937e-03,  4.5732e-02,  5.5097e-03,  2.7366e-02, -6.0614e-03,
        -3.2097e-04,  1.0867e-02,  2.5317e-03, -2.2224e-03,  1.0186e-02,
        -1.2739e-03, -1.0431e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9502e+00, -4.9248e+01,  4.3038e-02, -1.0284e-01,  1.9604e-01,
        -1.2565e-01, -2.7025e-01,  1.0316e-01,  4.1295e-01,  1.8447e-01,
         1.4053e-01,  2.4389e-01, -1.7642e-01, -2.2106e-02, -2.5976e-01,
         5.2170e-02,  2.0289e-01,  1.8112e-01,  8.5673e-01, -5.6957e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2565e+00,  1.9389e+01, -8.7819e-01, -5.9380e-02, -3.2633e-02,
        -1.1063e-01, -1.2628e-01, -2.5939e-01, -3.6442e-02,  7.9450e-02,
        -1.1239e-01, -3.0782e-03, -1.7733e-02,  3.2071e-02,  8.2477e-03,
        -1.4832e-02, -7.0536e-02,  2.1625e-01, -1.0095e-01,  3.2538e-02,
         1.0517e-01, -1.4162e-01, -1.4038e-02,  1.5731e-01, -1.8818e-01,
        -2.4207e-01, -3.6885e-01, -1.2152e-01,  7.7197e-02, -8.0766e-02,
         2.0396e-02,  3.0684e-02,  1.7345e-01, -3.6954e-02,  2.0137e-01,
         1.9498e-04, -1.3652e-01,  2.2825e-03, -1.0477e-02,  3.8806e-02,
        -1.3765e-02,  1.4608e-01,  1.1151e-01, -3.1521e-02,  1.9183e-02,
        -3.9855e-01,  1.5540e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.0746e-01,  5.4730e+01, -8.3578e-02,  4.3607e-01,  2.2060e+00,
         1.3681e-01,  4.8120e-01, -1.6827e-02,  4.6475e-01, -5.8935e-01,
        -1.4465e+00,  7.2613e-01,  8.9388e-01,  1.2511e+00, -5.6864e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1360e-02,  3.6730e+00,  6.6674e-02,  1.6111e-02,  1.5780e-02,
         7.8800e-03, -2.4540e-02,  7.7337e-02,  8.7172e-04, -1.0489e-02,
        -3.4468e-03,  3.6725e-03, -2.3764e-02,  5.0735e-02, -3.7539e-02,
         3.2791e-02, -2.1119e-02,  8.6544e-03, -4.7976e-02, -2.6612e-02,
         5.0665e-04, -1.6100e-02,  5.6081e-03,  8.5502e-03, -1.1934e-02,
         3.3628e-03,  5.1750e-02, -1.6719e-01, -1.5937e-02, -1.1777e-02,
        -1.1147e-02,  7.7456e-03,  6.9768e-03,  6.0601e-04,  2.4680e-02,
         1.8645e-02,  8.8641e-03,  8.6910e-03, -2.3627e-02,  4.1293e-03,
        -6.5750e-03,  1.1011e-02, -8.5226e-03, -1.0187e-03, -4.2204e-03,
        -9.4445e-03,  2.1844e-02,  8.5358e-03], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 1.2405e+00,  2.4257e+01,  2.7673e-01, -2.6132e-01,  2.4724e-01,
        -4.8608e-01,  5.9867e-02,  4.3553e-02, -2.3361e-01, -3.1433e-01,
         4.8698e-02, -4.6366e-02, -2.3016e-01,  1.7608e-02, -7.4096e-02,
        -6.6457e-02, -5.3611e-01,  4.2082e-02,  3.9082e-02, -8.0742e-03,
         6.0578e-02,  6.3149e-02,  3.5803e-02, -1.0291e-01,  6.4138e-01,
         3.2367e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6597e-01, -2.5916e+00,  3.2400e-02, -6.6028e-03, -6.7604e-03,
        -3.9644e-03,  2.1951e-02, -2.3164e-02, -1.9471e-02, -1.1180e-02,
        -4.7956e-02,  1.5148e-02, -6.8182e-04, -8.2236e-03,  1.2249e-02,
         1.5359e-02, -2.6147e-02,  8.1990e-03, -3.3676e-02,  2.3745e-02,
        -2.6851e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6337e+00, -1.0640e+02,  6.3978e-02, -1.7198e+00,  1.4972e+00,
         7.5741e-01,  4.0976e-01, -5.8653e-02,  7.0742e-01,  7.7625e-01,
         5.7568e-03, -5.0885e-02,  1.1141e+00,  1.6636e-01, -1.0323e+00,
        -8.6689e-01,  7.0604e-01, -3.6786e-01, -7.7761e-01, -4.0318e-01,
         1.4269e-01,  2.3897e-01, -8.2068e-01,  3.6285e-01, -1.1700e+00,
         1.9671e-01,  5.4352e-02,  2.0316e-01, -1.0796e-01, -2.2099e-01,
         2.5828e-01, -1.8502e-01, -1.3582e+00,  4.6612e-01,  4.8265e-01,
         6.0329e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0340, 11.9980, -0.3322, -0.2902,  0.0634,  0.4036, -0.3979,  0.0650,
        -0.1596,  0.1711,  0.1855,  0.0731, -0.0599, -0.1915, -0.0569, -0.0740,
         0.0795, -0.0887,  0.0510, -0.3512,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5794e-01, -1.2462e+01,  6.2165e-01, -1.8696e-01,  4.5334e-02,
         6.7788e-02,  5.8924e-02, -1.2595e-01,  3.4919e-02,  9.0167e-02,
        -3.4867e-01, -1.6124e-01,  1.4946e-01,  1.1632e-01, -1.8684e-01,
        -8.4804e-03,  5.4298e-03, -7.4510e-02,  1.4584e-01, -3.9493e-03,
        -3.6956e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -4.6084, -73.1731,   1.8666,   0.3557,   0.9532,   0.6203,   1.1591,
          0.8909,  -0.2689,   0.3382,   0.3157,  -1.3213,  -4.0408,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9264e+00, -1.2842e+02, -3.6935e+00, -6.6615e-01, -3.0107e-01,
         7.6190e-01,  8.4613e-01,  5.8099e-01,  2.1641e+00,  2.9857e-01,
         2.7291e-01,  4.0709e-01,  2.2862e-01,  3.5800e-02, -5.8803e-01,
         7.1898e-01, -8.8955e-02,  1.6811e-01,  5.2012e-01,  3.2958e-01,
        -8.9390e-01, -4.7123e-01,  2.2680e-01,  2.7570e-01, -5.1729e-01,
         4.8978e-02, -9.6060e-02, -4.0628e-01,  3.4072e-01,  4.5968e-01,
        -9.2993e-03, -1.9844e-01, -9.4909e-01,  3.4825e-01, -1.1558e+00,
         7.1523e-01,  5.1588e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.8526e-01, -1.4454e+02,  2.9354e+00,  4.5473e-02,  2.8235e+00,
         8.6644e-01,  6.8513e-01,  6.5849e-01,  6.7090e-01,  6.9242e-01,
         9.3886e-01,  1.5619e-02, -5.2266e-01, -4.3179e-01,  2.0059e-01,
         1.0340e+00,  3.3757e-01, -7.5478e-01,  2.0096e-01, -2.9498e-01,
         7.6628e-01,  1.1539e+00,  4.6505e-01, -3.7131e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0344e-02,  1.7175e+01,  7.4048e-01, -1.1140e-01,  1.4509e-01,
        -1.7019e-03, -6.6202e-02,  2.6108e-01,  4.5144e-02,  1.6322e-03,
        -5.5847e-02, -1.0644e-01,  5.4826e-02, -9.5558e-02,  1.5047e-01,
        -5.0048e-02, -9.6527e-03,  3.7329e-02, -1.3488e-01,  5.5574e-02,
         9.5205e-04, -7.8348e-02,  2.5666e-02, -2.6506e-02, -6.9768e-03,
        -1.8514e-02, -4.7314e-02, -2.5688e-02,  7.1800e-02,  1.1688e-01,
        -2.2290e-01, -3.0945e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4070e+00, -1.1714e+02, -2.0823e+00,  1.4419e+00,  1.7840e+00,
         2.3905e+00,  3.7775e-02,  5.0722e-01, -1.2372e+00, -4.0957e-01,
         2.3478e-01,  4.2092e-01,  1.9413e-01, -2.6931e-01,  4.0120e-01,
        -9.1176e-01,  9.2214e-01,  4.7674e-01,  2.0611e-02, -1.8023e-01,
        -6.9259e-01, -4.0930e-01, -1.2878e-02,  2.4846e-02, -7.4142e-02,
         5.7789e-01, -2.7120e+00,  2.4966e-01, -1.0343e+00, -2.3089e+00,
         2.7515e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1510e-01,  4.4810e+01,  1.9646e+00,  6.2382e-01, -1.3495e-01,
        -6.4259e-01,  1.7991e-01, -4.7415e-01,  6.8687e-01,  3.9763e-02,
         1.1475e+00, -2.9359e-01, -2.5690e-01, -8.8083e-01, -2.5619e-01,
         3.3200e-01, -3.3212e-02, -2.3699e-01, -5.1714e-02,  3.3487e-02,
        -6.4769e-01,  2.7147e-01,  7.4255e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0939e+00, -1.1599e+02, -1.0388e+00,  2.2017e+00, -1.1245e+00,
         1.0049e-01, -2.2547e-01,  1.9494e+00, -4.1079e-02,  1.7628e-01,
         2.3718e-01,  2.4775e+00,  8.0221e-01,  1.1980e+00, -8.6005e-02,
        -1.8534e-01,  7.7278e-01,  8.3236e-01, -9.0984e-02, -6.0307e-01,
         3.2227e-01,  7.4086e-01, -1.7796e+00, -4.7005e-01, -1.1768e-01,
         3.5348e-01, -5.4094e+00, -2.2750e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 1.7938e-01, -1.4267e+01,  1.6713e-01, -1.3033e-01, -1.2619e-01,
        -7.7077e-02, -8.4717e-02,  4.1367e-02,  3.9124e-02,  9.2610e-02,
         9.6770e-02,  4.5326e-02, -4.5566e-02, -2.4583e-02, -3.3084e-02,
        -9.3137e-02,  4.1503e-02, -3.6596e-01,  6.2464e-02, -1.1369e-02,
        -1.4565e-01, -1.7804e-02,  1.6129e-01, -1.0310e-02,  7.5565e-02,
        -5.7741e-02,  9.0447e-03,  1.4115e-01, -4.4303e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2120e+01, -1.4144e+02, -1.7250e-01, -1.6795e+00, -2.9498e+00,
        -1.4192e+00,  2.0539e-01,  1.1246e+00, -2.4084e+00, -2.8781e+00,
        -1.6757e+00,  3.8717e-01,  2.6970e-01,  9.5059e-01, -2.3835e-01,
        -6.7518e-01, -1.3311e+00,  4.0359e-01, -3.3614e-01, -3.4271e-01,
         1.6663e-01,  1.3015e+00,  4.1145e+00,  2.7885e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2543e+00, -6.4193e+01,  4.6072e+00,  1.6954e+00, -6.0626e-01,
        -5.9239e-02, -3.0513e-01,  2.2651e-01, -6.4542e-01,  1.8583e-01,
         4.6873e-02, -5.0735e-01, -1.2147e+00, -1.0269e-01,  2.8548e-01,
        -1.9487e-01, -3.3312e-01, -1.3507e-02, -6.0747e-01, -7.1242e-02,
        -3.0785e-01,  5.5048e-01,  1.6029e-01, -3.7565e-01, -5.4927e-01,
         2.2313e-01,  1.2323e-01, -1.9934e-03,  1.7483e-01, -1.8195e-01,
        -4.4402e-01,  3.7918e-01, -9.4236e-01, -9.3957e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.9507e-02,  1.9919e+01, -1.2011e+00, -3.3738e-01, -1.9408e-02,
         2.0744e-01,  3.5835e-01,  1.8798e-01,  1.0191e-01,  4.1991e-02,
         3.0186e-02, -1.4734e-01, -5.6757e-02, -7.0415e-02, -1.4484e-02,
         5.1649e-02, -8.2424e-02, -8.4880e-02, -1.4544e-01,  5.9966e-02,
        -1.9044e-01,  3.2699e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8629e-01,  3.1366e+01,  4.0859e-01,  1.3742e-01,  7.9283e-01,
        -5.2886e-01, -3.6308e-02,  1.9309e-02, -2.3732e-01, -3.9273e-02,
         4.2507e-01, -2.7407e-01, -1.1703e-01,  1.1424e-01, -9.7329e-02,
        -2.9934e-01, -2.9601e-02,  3.5312e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2886e+00, -8.0539e+01,  2.9966e-03, -4.2451e-01, -1.3536e-01,
         3.4868e-01, -4.6453e-01,  5.0973e-01, -1.0596e+00,  1.4050e-01,
        -1.8915e-01, -2.5316e-01,  1.4164e+00,  1.7943e-02,  3.1067e-01,
         3.7644e-01, -7.3425e-02,  1.3858e+00, -1.4766e-01,  4.7568e-01,
         7.1894e-01, -1.3189e+00,  2.2402e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1814e+00, -1.0316e+02, -1.5041e+00, -1.2810e+00,  1.9866e+00,
         3.4179e-01,  1.1916e-01,  3.2794e-01, -3.3542e-01,  1.2439e-01,
         1.2109e+00,  2.8836e-02,  1.8435e-01, -9.1038e-01,  1.1211e+00,
         1.0616e+00,  7.2253e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  3.8246, -28.8764,  -0.2825,  -0.0843,  -0.1459,  -0.1384,  -0.1450,
          0.3068,  -0.1147,   0.0804,   0.0897,   0.1245,   0.1709,  -0.0432,
          0.1768,  -0.4158,   0.4065,  -0.3503,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3383e-01, -2.9897e+00,  1.9286e-01,  6.0320e-02, -9.5128e-03,
         1.9046e-03, -1.3614e-02,  1.9970e-02,  1.6974e-02, -1.0272e-01,
        -1.2616e-02,  3.1031e-02, -3.4688e-02, -1.1519e-04,  5.1260e-03,
         2.1739e-02,  9.1493e-02, -4.0273e-02, -2.3574e-02, -1.6965e-02,
         2.4258e-02,  1.0181e-02,  1.0481e-02,  1.2826e-02,  1.1860e-02,
         1.2398e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0712e-01,  3.9793e+01,  1.5254e-02,  2.6020e-01, -1.4131e-01,
        -4.1231e-01,  9.1696e-02,  1.4382e-01,  3.9794e-01, -3.9861e-01,
        -1.0258e-01,  9.4191e-03,  1.6787e-02, -2.0848e-01, -2.2717e-01,
         8.3690e-02,  1.2153e-01,  1.9334e-02,  2.1757e-01, -1.5304e-02,
         1.3571e-01, -2.2892e-01,  1.9029e-01, -1.0878e-01, -4.3804e-02,
         7.8422e-02, -1.5063e-02, -1.0191e-01,  1.4941e-01,  5.0844e-01,
        -1.7901e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.9446, -89.0699,   0.2536,   0.3960,   0.2931,  -0.2219,   0.2379,
         -0.3150,  -0.6182,  -0.1771,   0.1991,   1.0022,   0.5763,   0.5520,
         -0.1815,  -0.2833,   0.1904,   2.3543,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1130,  7.3884, -0.1067, -0.1792, -0.0172, -0.0419, -0.0477,  0.3036,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-9.9132e-02,  6.7783e+00,  1.4204e-01, -3.4100e-02, -1.0398e-01,
         4.1166e-02, -4.0913e-02, -2.6832e-05,  9.0014e-04,  4.6912e-02,
         4.5083e-03, -7.7355e-02,  1.3873e-02, -1.8453e-02,  1.0549e-02,
         1.2300e-02,  1.2747e-02, -1.9040e-02,  2.6324e-02,  2.9932e-02,
         3.0190e-02,  3.1134e-02,  9.1164e-03, -3.9629e-02, -5.0691e-03,
         1.8946e-02, -2.9358e-02,  2.6572e-02,  4.2125e-03,  3.9227e-03,
         3.8630e-03,  1.6294e-02,  1.6264e-02,  3.6581e-02,  1.2711e-02,
         3.8563e-02,  1.4902e-02,  7.7034e-02,  5.5007e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5282e-01, -5.6477e+00,  6.4722e-02, -9.5518e-03, -4.5562e-03,
         1.1472e-02,  5.3219e-02,  3.8474e-02,  1.5321e-02, -2.3216e-02,
         7.7801e-03, -3.3353e-02, -2.8858e-02,  5.8889e-02, -9.1878e-03,
         7.2643e-04, -1.6833e-03,  5.7634e-02, -2.6887e-02, -5.5119e-02,
        -3.3047e-02, -3.3194e-02,  1.3280e-02,  3.1073e-03,  3.8769e-03,
        -1.8105e-02,  4.5166e-02,  4.2014e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0312e+01,  1.0914e+02, -3.7780e+00,  9.7200e-01, -1.8362e+00,
         1.4282e+00, -1.5894e+00, -1.5705e-02,  4.7174e-01,  4.3069e-01,
         7.1713e-02, -1.6367e+00, -1.7415e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9718e-02, -4.3751e+00,  7.5068e-02,  5.4541e-02,  4.4087e-03,
        -3.2218e-02,  1.7475e-02, -3.0159e-02, -3.9360e-02, -3.1166e-02,
        -3.5023e-03,  6.3167e-02,  2.3947e-03,  1.0788e-03,  5.4700e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1159e+00, -6.0695e+01, -1.9729e+00,  5.1462e-01, -8.1055e-01,
         3.0330e-01, -6.1878e-01, -1.6132e-01,  4.9133e-01,  3.6900e-01,
         9.1885e-02,  1.6933e-02,  8.4060e-01, -8.6253e-01,  8.5112e-02,
         1.4231e-01, -1.9625e-01, -5.5203e-02,  3.4683e-03,  1.4460e-01,
         9.7916e-02,  1.9859e-01, -8.7379e-02,  2.1696e-01, -2.7750e-01,
        -6.1305e-01, -1.8928e-01, -3.9607e-01,  3.0817e-01, -5.1695e-02,
         9.6644e-02,  1.5375e-01,  1.6420e-01,  5.7360e-02, -6.0617e-01,
         5.9587e-02,  8.0530e-02, -6.4926e-02,  2.1824e-01,  2.2093e-02,
        -1.9076e-02,  2.8082e-01, -9.4354e-02,  1.8212e-01, -1.6113e-01,
        -1.3597e-03,  7.3751e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7508e-02, -1.1420e+01, -1.4090e-02,  1.0556e-01,  1.6009e-01,
         1.4757e-01,  2.4069e-02,  5.8641e-02,  6.1875e-02,  6.0869e-03,
        -2.2916e-02, -3.8502e-02, -1.2708e-02, -1.5992e-02,  1.3501e-04,
         1.0363e-02, -1.9930e-02, -3.2067e-02, -2.4534e-02,  1.2258e-02,
        -2.2954e-02, -4.5166e-02, -2.3460e-02, -2.2337e-02, -2.9737e-02,
         6.0854e-02, -3.4509e-02, -2.3514e-02,  2.0652e-03, -5.6984e-02,
         6.1393e-06, -3.3522e-02, -5.3979e-03, -6.3174e-02, -1.4428e-02,
        -4.0822e-02,  1.1596e-03, -4.6807e-02, -2.6228e-03,  3.0691e-02,
        -7.1092e-02,  7.7662e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.7329e-03,  2.4161e+01,  6.1713e-01, -2.8791e-01, -1.7087e-01,
         5.2284e-02,  8.8276e-01,  2.3062e-02,  2.1131e-02,  1.9768e-01,
        -6.2451e-03,  2.2527e-01,  1.1114e-01,  9.4570e-02,  2.1085e-01,
        -2.3144e-02,  4.7214e-01, -2.6753e-03,  7.4403e-02,  5.9632e-02,
        -1.7883e-01, -6.4292e-02, -2.7797e-02, -7.4674e-02, -3.1690e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.2550e-02,  3.2314e+00, -9.0916e-02, -1.3843e-02,  9.2278e-03,
        -4.9765e-02,  6.3487e-02, -1.4290e-02, -3.3378e-02, -1.1077e-02,
        -4.3987e-03, -1.0517e-03,  2.0039e-02, -5.0707e-03, -1.0988e-02,
        -1.1805e-02,  7.1984e-03,  4.4834e-02, -2.1459e-02, -1.0154e-02,
        -1.6758e-03,  4.8294e-02,  1.6308e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1142e-02, -1.9770e+00, -1.7497e-01, -2.4101e-02,  3.7095e-02,
        -7.4906e-03, -2.2408e-02,  4.3697e-03,  1.5707e-03, -5.7127e-02,
        -7.2807e-03,  1.6564e-03,  1.2932e-03, -7.8279e-03, -6.9692e-03,
         5.0646e-03, -1.0992e-02,  1.6146e-02,  2.4572e-03, -1.3446e-03,
         1.9560e-02, -1.4980e-02,  4.0598e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5027e+00,  7.1421e+01,  3.1527e-01, -2.2782e+00,  4.2678e-02,
         1.3809e-01,  2.2250e+00,  2.3981e+00,  1.3184e+00,  4.6458e-01,
        -7.1089e-01, -2.4905e+00,  1.0959e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.4596, 147.3595,  -0.6770,  -2.5226,   0.9452,  -0.1650,  -0.3744,
          2.1377,   4.3038,   0.8949,   0.7752,   0.2769,  -0.6584,  -2.3198,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5607e-01,  1.0920e+02, -2.8093e+00, -3.9210e-01,  1.0211e+00,
         3.1881e-01,  5.3724e-01,  2.2502e-01,  1.2327e-01,  1.2770e-01,
        -1.5679e-01,  4.9412e-01,  9.7578e-01,  8.2203e-02, -1.6667e+00,
         1.6515e-01,  1.9139e-01,  2.2102e+00,  9.5909e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 2.5523e-01,  5.7966e+00, -6.8515e-03, -1.0394e-01, -7.0736e-02,
         5.6108e-02,  2.0193e-02, -2.8176e-02, -2.4502e-02,  2.8777e-02,
         1.2440e-02, -4.3204e-02, -3.3159e-03,  4.3736e-02, -1.2491e-02,
        -1.4615e-02,  1.3649e-02,  4.1650e-02,  5.3622e-03,  7.5618e-02,
        -3.6836e-02,  3.3980e-02,  3.5208e-03,  1.4761e-02, -1.0986e-01,
        -8.0152e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1646e+00,  4.2991e+01, -5.2579e-02, -8.5592e-01, -5.8433e-01,
        -2.3152e-01, -4.3488e-01, -6.4445e-01,  3.0951e-02,  8.8956e-01,
         2.8984e-01,  9.2596e-02, -5.1604e-01,  3.0406e-01,  3.7617e-02,
        -3.6498e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1883e+00,  2.3553e+01,  5.6273e-01,  4.7858e-02, -2.5078e-01,
         1.1561e-01,  3.0909e-01,  1.1289e-01,  4.5418e-01,  3.5806e-02,
         1.6630e-01, -5.6383e-01,  1.0288e-01, -3.5750e-03,  2.6455e-01,
         1.7558e-01, -1.3428e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9529, 85.1404,  3.2265, -0.5088,  0.3694, -0.4701,  0.2712,  0.4950,
        -0.3606,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5899e+00,  2.9225e+01,  5.2643e-01, -1.9842e-01, -5.5854e-01,
         3.0603e-01,  1.9432e-01,  3.4879e-02, -1.5688e-02, -1.3572e-01,
         1.5711e-01, -1.2915e-01, -6.7240e-02, -1.2053e-01,  7.5137e-02,
         2.3336e-01, -1.3933e-01, -5.2667e-02,  1.0954e-01,  8.7408e-02,
        -9.3612e-03,  9.6066e-03, -2.8405e-01, -7.9918e-02,  1.1183e-01,
        -3.8076e-01,  2.5209e-02, -1.2646e-01,  1.2174e-01, -1.1655e-01,
         1.7071e-01, -8.8485e-02,  1.8336e-01, -2.2324e-02,  1.3116e-01,
         3.6040e-02,  1.0292e-01, -1.1910e-01, -4.6945e-01, -5.2451e-02,
        -3.4813e-01, -6.7073e-01, -4.3321e-01, -2.5693e-01,  2.0510e-01,
         1.0182e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3792e+00, -1.0011e+02,  2.1871e+00,  1.8089e+00,  8.8131e-01,
        -5.6602e-01,  2.5343e-01, -9.1024e-02,  6.6695e-01, -5.5058e-01,
         1.2409e-01, -1.8050e-01, -2.6304e+00, -3.6808e-01,  5.1168e-02,
        -5.8260e-01, -2.3178e-01, -1.2455e+00,  5.0207e-01, -1.1666e+00,
         1.2749e+00, -2.5332e-01, -3.7126e-01,  4.5045e-01,  4.8497e-01,
         8.1544e-01,  8.6633e-01,  3.8863e-01, -5.1809e-01, -4.0011e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  -4.2437, -119.9034,    1.4122,   -0.6206,    0.6113,   -0.9860,
          -1.1798,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.8597, 140.1366,  -0.9873,  -1.2474,   1.5096,  -2.5714,   0.6302,
          0.6131,  -3.9909,   0.3929,  -0.9472,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9205e-02,  1.1653e+01,  3.8661e-01,  9.4067e-02, -4.4915e-02,
        -3.4127e-02,  4.0789e-02,  2.4936e-02,  1.2917e-01, -5.8901e-04,
        -2.8299e-02,  6.1426e-02, -1.3921e-02, -1.8752e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2389e+00, -9.2969e+01, -1.7884e+00,  1.9880e-01, -8.0996e-01,
         4.9769e-01, -6.4662e-01,  1.8489e+00,  1.3720e+00,  1.5240e-01,
        -1.6140e-01,  3.7216e-02,  2.7663e-01, -2.1914e-01,  1.5762e-01,
        -6.0398e-01, -1.8441e-01, -8.3524e-01,  1.0531e+00, -9.6980e-01,
        -3.0561e+00, -2.2380e+00, -4.8005e-03,  2.4310e-01,  1.8222e-01,
        -2.0347e-01,  2.0053e-01, -5.4576e-01, -2.2620e-01, -3.5522e-02,
        -6.1793e-02,  1.2717e-01,  7.6903e-01, -1.5955e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6493e-01,  3.5506e+01,  2.2694e+00, -7.6448e-03, -7.6813e-02,
        -2.3012e-02,  8.7931e-02, -2.7631e-01, -1.5310e-01, -7.7599e-02,
         3.6726e-01, -2.1407e-01,  3.9052e-01, -1.3241e-01,  2.9817e-01,
        -1.7487e-01, -1.5291e-01,  7.8678e-01, -7.1718e-02,  8.8337e-02,
        -1.7198e-01, -2.4894e-01, -3.0231e-01, -4.7538e-02, -9.2244e-02,
        -1.3391e-01,  6.5424e-01,  2.4980e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3048e-01,  2.1574e+01,  2.5278e-01, -6.0339e-01, -1.8196e-02,
         3.9363e-01, -4.2707e-02,  1.1435e-01,  1.8103e-02, -6.6909e-02,
         3.7989e-02,  7.5480e-03, -2.8623e-01,  2.1595e-01, -1.2998e-01,
        -1.9085e-01,  1.7554e-01,  3.4510e-01,  2.6814e-01,  5.6440e-02,
        -6.6007e-02,  4.7200e-02,  1.8458e-02, -2.9994e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 1.2911e-01,  4.4045e+00,  2.3428e-01, -3.6628e-02,  5.8951e-02,
        -3.4150e-02,  9.5056e-03,  8.7939e-03, -3.0528e-02, -1.2836e-02,
        -3.5284e-02, -1.9867e-03, -2.1765e-02,  2.0820e-02,  1.3287e-02,
        -1.1985e-02, -7.4850e-03, -4.3469e-03,  3.0382e-02,  1.0237e-01,
         1.7617e-02,  1.2321e-03, -7.0717e-03, -1.5615e-02,  1.0535e-03,
         5.6451e-02, -4.6369e-02, -2.8814e-02,  6.0862e-04, -5.8113e-02,
        -8.9819e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  8.0841, 106.1531,  -3.0224,  -3.2534,  -2.6399,   0.1912,   1.0914,
          0.7041,   2.0407,   1.8620,   0.9233,   1.5013,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.1437e-01,  1.2675e+02, -1.6623e-01,  8.7532e-01,  2.4677e+00,
         1.2924e+00,  6.1426e-01, -1.1709e-01, -8.6914e-01, -1.6332e+00,
         4.7975e-01, -2.8484e+00,  3.3223e+00, -8.4807e-01, -1.7342e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4208e-01,  4.2780e+01,  7.9975e-01, -1.9787e-01,  9.0899e-01,
         9.3761e-02,  1.1430e-01,  6.7285e-02,  2.4997e-02,  1.9839e-01,
        -1.2579e-01, -2.7811e-02, -3.0023e-01,  1.6465e-02,  7.7715e-02,
        -1.5162e-02,  1.3699e-01, -6.5399e-02,  2.2150e-01, -2.1809e-01,
         1.1986e-01,  9.4460e-02, -1.4281e-01,  8.1730e-02, -3.9475e-02,
        -1.0025e-01,  1.3748e-01, -2.8814e-02, -2.1615e-01,  2.6080e-01,
        -8.6260e-03, -7.5692e-01,  3.9703e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8390e-02, -2.5627e+00, -6.0661e-03,  5.0024e-02, -4.9132e-03,
         1.1880e-02,  3.6849e-02,  4.0256e-02,  3.0278e-02,  5.2326e-03,
         3.9783e-02,  1.0109e-02,  2.5227e-02, -6.3879e-04, -4.3392e-02,
        -2.8907e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.4965e-03,  3.0255e+00, -4.0507e-02, -3.1144e-03, -7.7273e-02,
        -2.3050e-02, -2.2081e-02, -6.1954e-02, -2.5587e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1322, 23.7734,  0.1811, -0.2774, -0.6559,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2095e+00, -4.3145e+01, -2.3245e-01,  3.5055e-01, -6.1459e-03,
         2.4723e-01,  8.2826e-02, -5.3680e-03, -5.3195e-01, -9.0687e-01,
         1.3244e-01,  4.8205e-01,  3.2989e-01,  1.8697e-01, -4.9227e-02,
         2.7523e-01, -1.4419e-01,  1.2437e-01,  3.9774e-02, -1.4894e-01,
         7.4135e-02, -1.7774e-02, -2.3688e-01,  3.0624e-01,  6.3142e-02,
         4.2657e-01, -3.5752e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-15.2452, 124.6037,   1.6331,   1.7917,   3.2723,   1.9129,   0.6082,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6937e+00,  1.7740e+02, -4.8101e+00, -1.1160e+00, -1.0335e+00,
        -1.5233e+00, -8.5657e-01, -6.1263e-01, -2.9104e+00, -2.3747e+00,
        -5.7052e-02, -5.9717e-01,  4.5763e-01,  1.7460e+00, -1.7120e-01,
        -2.3062e-01, -8.1911e-03, -3.7667e-01,  1.4883e+00, -2.5278e+00,
        -1.4377e+00,  3.3355e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1822e-01, -1.2332e+01, -1.2524e+00, -1.7543e-01, -1.8142e-01,
        -9.0011e-02,  5.5199e-02, -2.7554e-01, -1.6125e-02,  1.9957e-01,
        -4.8868e-02, -5.5188e-02,  1.1066e-01, -1.8677e-01,  2.1603e-02,
        -2.3096e-02,  4.1931e-03, -7.5719e-02, -1.7676e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0160e+00,  5.2383e+01,  9.6448e-01, -7.8243e-01, -3.7095e-01,
         7.0557e-02, -6.6444e-01,  3.2111e-01,  3.9915e-01, -2.3633e-01,
        -2.2344e-01,  1.4444e-01,  1.9520e-02,  1.1708e-01, -9.3826e-02,
        -1.0516e-01,  4.2366e-02, -7.7064e-02, -1.3814e-02, -1.1973e-01,
         4.8088e-01,  4.7493e-01, -5.9880e-03, -2.3435e-02,  7.2563e-02,
         3.0713e-02, -2.5395e-01,  6.3586e-02,  2.7317e-01, -1.0296e-01,
        -8.3445e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 1.5816e+00, -9.1453e+01, -3.9971e+00,  6.5719e-02,  2.3630e-01,
         7.2244e-01,  6.3307e-01,  6.2183e-01,  7.7495e-01,  3.9531e-01,
         2.4328e-01,  2.3436e-01, -8.7558e-01, -1.5619e+00, -5.0782e-01,
         1.2825e-01, -1.0268e+00, -2.8173e-01, -8.3248e-01, -4.9928e-02,
         7.3664e-01,  5.7065e-01, -3.3779e-01,  4.2521e-01, -9.0113e-01,
        -3.0697e-02,  3.4540e-01, -6.9418e-01,  3.3727e-01, -7.6149e-01,
        -3.3898e-02, -9.1773e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5522e-02,  8.1368e+01, -1.7755e+00,  4.4421e-02, -1.2420e+00,
         4.5970e-01, -2.5498e-01,  1.7660e-01,  2.6999e+00,  9.1191e-02,
        -2.2564e-01,  8.4702e-01, -3.9650e-02, -8.5539e-01,  3.9285e-01,
         1.0206e+00,  2.3829e-01,  3.8755e-01, -2.6704e-01,  2.7086e-01,
         5.5558e-01, -1.4106e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3417e-02,  2.3487e+01, -4.5974e-01, -5.7109e-01, -1.1508e-02,
        -1.1804e-01, -3.6809e-01, -1.8416e-01, -5.9690e-02, -1.7530e-01,
         1.5924e-01,  1.3381e-01,  4.3119e-01, -5.0076e-03, -4.6873e-02,
         1.8406e-02, -8.2380e-02,  1.8145e-01, -4.0432e-02, -2.9698e-02,
         3.7395e-02, -5.9294e-02, -1.2394e-02, -7.4614e-02, -9.2892e-02,
         1.1225e-01, -5.7546e-02,  2.1958e-01, -4.7066e-01,  4.8430e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0837, 25.2517,  0.2501, -0.2330, -0.3684, -0.3681,  0.1450, -0.4122,
        -0.4191, -0.1471, -0.4248,  0.2298, -0.2999,  0.3112,  0.0896, -0.1245,
         0.4607, -0.0500,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3660e+00,  4.2659e+01,  4.3333e+00,  2.2783e-01, -5.9432e-01,
        -7.1011e-01, -2.5554e-01,  1.8311e-01, -3.7350e-02, -3.1570e-02,
        -6.2345e-02,  1.2330e-01, -2.8256e-03,  2.6034e-01,  4.3965e-02,
        -3.6897e-01, -8.6387e-03,  1.5188e-01, -1.4389e-01, -1.2053e-01,
        -5.5961e-02, -1.4960e-01,  7.5429e-02,  1.3682e-01, -1.1853e-01,
         5.4947e-02, -1.4816e-01, -5.3214e-02,  2.6459e-02, -3.6555e-02,
        -7.4981e-02,  2.1025e-01,  1.6422e-01, -2.5229e-01,  2.7028e-02,
         3.0156e-01,  2.4918e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7542e-01,  1.5772e+01, -4.0797e-01, -2.0442e-01,  1.0200e-01,
         8.9642e-02,  1.0942e-01, -9.6388e-02, -8.9899e-02, -6.1410e-02,
         2.1913e-02,  3.0267e-03,  5.2070e-02,  1.5765e-01,  1.2373e-01,
        -5.8178e-02,  5.2067e-02,  2.4913e-02, -1.4867e-02,  3.5028e-02,
        -1.4824e-01, -1.2421e-02,  1.9299e-02,  3.8038e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0911e+00,  3.9755e+01,  8.2119e-01,  1.9362e-01,  2.2224e-01,
        -1.2097e-01, -1.8146e-01, -2.5697e-02,  1.4179e-01, -2.3582e-01,
        -1.9354e-02, -1.3746e-01, -4.3415e-01,  4.3981e-02, -4.9235e-02,
         6.2106e-03, -6.6837e-02, -4.1084e-02,  3.7467e-02,  1.2481e-01,
         6.7610e-02, -3.5756e-01, -6.7483e-02,  4.9065e-02,  1.8021e-01,
         5.1770e-01, -6.9638e-02,  1.1955e-01,  4.3251e-02, -4.6624e-02,
        -3.5153e-01,  8.7918e-02,  2.9605e-02,  1.7107e-01,  1.5175e-01,
         5.8886e-02, -2.0990e-01, -4.5053e-03,  6.6952e-02, -8.8079e-02,
         1.7539e-01, -5.1241e-02, -1.1534e-01,  2.2461e-01,  1.0305e-01,
         5.2743e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2202e-02,  1.5159e+01, -5.8201e-01, -1.6260e-01, -1.1165e-02,
         3.3518e-02, -3.6261e-01,  5.7263e-02, -3.3180e-02, -2.4011e-01,
        -2.1272e-02, -1.4728e-01,  1.2944e-01, -9.9064e-02, -1.7611e-04,
        -8.2704e-02, -6.2835e-02, -3.0392e-01, -1.9779e-01, -2.5461e-02,
         2.0574e-01, -4.4813e-02,  1.5862e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2333e+00, -1.1325e+02,  2.9936e+00, -4.9755e-01, -1.3005e+00,
         4.5779e-02, -1.4791e+00, -1.8452e+00,  2.6650e+00, -3.0784e-01,
         4.0489e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6919, 19.9581,  0.7533,  0.0393, -0.1729, -0.1526,  0.0494, -0.1179,
         0.1762, -0.0650,  0.1384,  0.0369, -0.2920, -0.2561,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7105e-01,  1.7244e+01,  5.8741e-02, -2.5208e-01, -1.4743e-02,
        -3.8444e-02, -1.9431e-02,  1.6173e-01,  1.6998e-01,  6.3836e-01,
         2.9000e-01, -3.1472e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5007e-01,  5.6168e+01,  8.7011e-01,  2.9643e-01,  2.1739e-01,
        -3.7422e-01,  6.2431e-04,  5.3285e-01,  1.9200e-01,  3.1684e-01,
        -8.0283e-02, -4.4420e-01,  2.2724e-01,  1.0877e-01, -2.1219e-01,
         9.9970e-02, -9.1154e-02, -3.1100e-02, -6.4314e-03, -4.6839e-01,
         9.6046e-02, -4.4797e-02, -7.7818e-02, -4.5686e-02,  3.1247e-03,
         1.2079e-01,  1.2319e+00, -1.4751e-01, -2.8866e-01, -1.3128e-01,
        -9.8690e-03,  3.7411e-01, -3.9009e-02, -2.2901e-01,  1.0976e-01,
        -2.0440e-01,  1.4699e-01,  1.7894e-03,  1.1669e-02, -7.4709e-02,
         1.1288e-01,  6.9138e-02,  4.0033e-02,  5.4631e-02, -5.7519e-02,
        -2.4579e-01,  1.1937e-01, -4.2278e-02,  2.7350e-02, -3.0850e-01,
        -2.4415e-01, -2.7619e-02, -1.6323e-01,  2.7065e-01,  3.3239e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-1.9900e+00,  9.2275e+01, -2.5674e+00, -1.9759e+00, -2.7883e-01,
         1.4939e+00, -4.0047e-01, -1.6107e-01, -2.4785e+00,  1.0270e-01,
         5.6254e-01,  2.7547e-01,  7.1651e-01, -9.2771e-01,  1.1348e-01,
         4.8604e-01, -1.0635e+00,  7.6073e-01,  2.2431e-01,  6.2263e-01,
         7.6425e-01,  6.7935e-02, -3.7280e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7977, 47.9201, -1.1392,  0.4924,  0.4556,  0.2772,  0.2909,  0.2838,
        -0.3739, -0.6558, -0.0980,  0.2533, -0.3780,  0.3263,  0.2830, -0.1849,
        -0.2811,  0.3615,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5972e-01,  5.5906e+00,  2.8063e-01,  1.2715e-01,  1.6009e-01,
         8.0664e-02,  1.0857e-01,  4.2227e-02,  2.3856e-02, -1.0944e-01,
        -2.0270e-02, -5.4349e-03,  1.0991e-01, -6.8702e-02,  4.9628e-02,
         3.0847e-02, -1.1785e-02,  1.9522e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0570e-01,  2.5340e+01, -4.4112e-01,  7.9882e-02,  2.7471e-01,
         5.3521e-01,  5.1932e-02,  9.0785e-02, -1.7642e-02,  2.3986e-03,
        -1.0723e-01,  6.7170e-02,  1.8827e-01,  6.9482e-01, -1.1954e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3111e-03,  4.0129e+00, -6.5935e-02,  4.7731e-02, -1.7446e-02,
        -3.9500e-02, -2.4544e-03, -5.9027e-03, -3.5100e-02,  3.0510e-02,
         1.3383e-02, -3.0153e-03, -3.5153e-03, -6.9334e-03, -2.5257e-02,
         4.4805e-02, -1.5525e-02,  3.1101e-02,  1.1618e-02, -3.2072e-02,
         2.3289e-02,  3.1069e-02,  1.7460e-02, -1.3071e-03,  4.9367e-02,
         4.8374e-04,  7.5358e-02, -4.4720e-03, -1.3054e-02,  6.1137e-02,
         3.6508e-02,  1.0033e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4083e+00,  9.9477e+01,  2.3097e-01,  1.3835e+00,  2.0919e+00,
         3.5245e-01, -8.9542e-01,  8.6245e-02, -1.5125e+00, -1.6968e-02,
         3.0997e-01, -1.2654e+00, -1.2366e-01,  2.5497e-01, -1.8217e-01,
         6.5445e-01,  1.1763e-02, -2.3432e-01, -4.1067e-01, -6.3968e-02,
        -2.5902e-01,  8.9786e-02,  6.9811e-02, -3.2528e-01,  8.6828e-02,
         1.6148e-02, -4.1374e-01,  1.7243e-01, -5.2976e-01, -5.3189e-01,
        -1.7632e-01, -1.1857e-01,  1.2765e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.6349e-01,  2.9551e+01, -1.1134e-01, -1.1241e-01, -1.3032e-01,
        -2.1100e-01,  2.0195e-02, -3.8358e-01,  7.4394e-03,  1.5536e-01,
         1.4924e-01,  1.9864e-01, -1.3358e-01, -2.7893e-02, -6.4041e-02,
        -1.0908e-01,  2.2289e-01, -2.9697e-02, -1.3569e-01,  4.4846e-02,
         7.9863e-02, -1.2941e-02,  9.1978e-02, -6.2869e-02,  6.3130e-02,
        -2.5780e-02, -2.2000e-02,  8.7442e-02,  6.7879e-02, -1.0254e-01,
         4.7398e-02,  1.2201e-01,  1.1905e-01,  9.8077e-02, -4.4299e-02,
        -9.7294e-02,  1.9204e-02,  7.2194e-03, -1.0648e-02,  6.3208e-02,
         2.1339e-02,  2.6206e-02,  4.5721e-02, -3.6957e-02,  4.8008e-02,
         4.9188e-02, -3.0018e-02, -4.6198e-02,  9.7817e-03, -6.3402e-02,
         2.2977e-02,  1.1081e-01, -1.1349e-03,  1.0472e-02,  1.4076e-02,
         1.3335e-01,  6.1954e-03, -3.6167e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3482e+00,  4.3207e+01,  1.2006e+00, -5.9052e-01, -1.1941e-01,
         7.3250e-02, -1.8231e-01,  4.3382e-02, -3.0338e-01,  1.9706e-01,
         1.2405e-02,  6.3013e-02, -1.2245e-01, -1.9361e-01, -4.8725e-02,
        -3.6002e-01,  1.5678e-01, -1.0369e-01, -1.5364e-01, -2.3812e-01,
        -1.0710e-01, -1.7746e-01, -3.2552e-01, -1.3040e-01,  1.6214e-02,
        -2.5355e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9062e-01,  4.7164e+00,  4.0347e-03, -6.6846e-02, -3.4111e-02,
         7.3413e-02, -2.8138e-02,  4.2281e-02,  3.5339e-02,  5.9574e-02,
        -9.9084e-04,  3.8801e-02,  2.4895e-02,  3.8260e-02,  5.3543e-03,
         1.0954e-02,  5.5886e-02,  2.0526e-02,  2.3945e-02,  6.3305e-03,
        -1.3519e-02, -4.3502e-02, -2.5761e-02, -3.6928e-02,  2.7212e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7156e-01, -5.5544e+01, -1.7949e+00,  4.2515e-01,  4.6629e-01,
         3.1129e-01, -4.2169e-01,  8.2795e-02,  7.4987e-02, -2.5039e-01,
         2.1125e-01, -1.0532e-01, -4.8055e-01, -8.9343e-01, -1.6007e-03,
         1.8944e-01,  3.4809e-01, -1.2390e-01, -7.8725e-02, -1.6562e-01,
         2.7005e-01,  8.9964e-01,  3.6315e-02,  1.2458e-01,  2.7217e-02,
         3.8791e-01, -2.2343e-01, -1.2622e-02, -2.8104e-02, -1.9881e-01,
        -2.1700e-01, -5.7750e-02, -1.6286e-01, -4.1876e-01, -4.5739e-01,
         4.5275e-01, -1.9286e-01, -9.0367e-02, -8.3210e-02, -1.5103e-01,
        -1.0887e-01,  1.3031e-01,  1.2258e-01, -3.7764e-01,  2.3846e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9159e-01,  7.4346e+00, -3.9321e-01,  1.2988e-01, -2.3839e-02,
        -1.4577e-01, -6.7548e-02,  8.0640e-03,  6.7318e-02,  7.1549e-02,
        -8.7206e-02, -3.6174e-02, -2.6942e-02, -1.8302e-02, -1.0191e-02,
         1.9328e-01,  9.2657e-03, -1.2611e-02, -1.1409e-02,  2.1246e-03,
        -1.2800e-02, -6.2335e-02, -3.3156e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0057,  1.4248, -0.0278, -0.0555, -0.0158,  0.0928,  0.0629, -0.0027,
         0.0034, -0.0031, -0.0589, -0.0090,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 3.0805e+00, -1.1449e+02, -1.7708e+00,  1.6499e+00,  2.7568e+00,
        -1.2754e+00, -8.8057e-02,  5.4125e-01, -3.7342e-01,  9.2139e-01,
         2.0287e+00, -4.2038e-03,  8.2217e-01,  7.6375e-01,  1.5922e+00,
         2.9954e-01, -4.8385e-01,  1.5367e+00,  4.9559e-01,  1.4202e+00,
         7.0792e-01,  5.3148e-01,  2.2701e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1860e+00,  4.5182e+01,  2.7403e-01, -4.5451e-01, -1.7234e-01,
        -2.9789e-02, -1.6524e-01,  4.2789e-01, -2.5697e-01,  4.4724e-01,
         7.4578e-02, -2.6605e-02,  1.2329e-01,  6.4052e-02,  1.4513e-01,
        -1.1707e-01, -2.3876e-01, -6.1610e-02,  2.9634e-02,  2.3757e-01,
         2.8152e-01, -1.8510e-01, -1.8161e-01, -2.9127e-01, -5.5506e-02,
        -1.6766e-02, -2.4118e-01,  1.0908e-01, -1.6061e-01,  1.4842e-01,
         9.6065e-02,  1.7643e-01,  3.5840e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5467e+00,  1.3697e+02,  1.5568e+00, -1.1493e-01, -1.4599e+00,
        -2.0183e-02, -4.0867e-01, -1.3978e-01,  3.7040e-01, -1.1492e-02,
         1.1355e+00, -1.4864e+00, -1.4455e+00,  8.8710e-01,  5.4704e-01,
        -2.6312e+00, -1.0216e+00, -2.5868e-01, -1.3623e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7182e-04,  1.7736e+00, -2.7899e-02,  7.0377e-02,  2.4328e-02,
         3.9420e-02,  8.7850e-03,  9.2209e-03,  1.3287e-02,  2.4627e-03,
         1.5530e-02, -6.3157e-03,  4.5442e-03,  8.8346e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.1583e-01,  2.0149e+01,  8.9182e-01, -1.9062e-01, -3.3030e-02,
         3.8048e-02,  3.4855e-02, -3.7764e-01,  1.1152e-01,  3.0144e-02,
         1.6807e-02, -8.4872e-03,  4.4529e-02,  1.2595e-01,  3.5472e-02,
        -6.2474e-03,  8.2724e-02,  7.3132e-02,  9.3358e-02,  6.4137e-02,
        -1.4051e-01, -1.0134e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.2090e+00,  1.6157e+02,  2.5982e-01, -9.8291e-01,  1.0129e+00,
        -1.6608e+00, -9.0185e-01, -1.0839e+00, -1.0471e+00,  4.4181e-01,
        -5.0074e-01, -1.0288e-01,  1.3574e+00, -7.7896e-01,  3.5171e-01,
        -1.1496e-01, -7.8320e-01,  2.3975e-01, -4.9550e-01, -7.7050e-01,
        -3.0003e-01, -7.4724e-01,  1.5909e-01, -9.3022e-01, -1.9350e+00,
         5.4900e-01, -6.0221e-01, -3.2610e-02, -2.2321e+00, -1.3881e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.5394e-02,  8.5546e+00, -1.2642e-01, -7.7564e-02, -3.3053e-02,
        -6.6765e-02, -6.1158e-02,  3.5870e-02, -2.7394e-02, -9.3660e-02,
        -2.3539e-01, -1.3727e-01, -1.4258e-01,  1.3547e-01,  7.4239e-02,
        -7.4103e-04,  1.3525e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2550e+00,  6.0742e+01,  1.3467e+00, -1.0639e+00, -1.8055e+00,
        -2.6982e-01, -2.6888e-01,  1.7206e+00,  1.1307e+00, -1.3049e+00,
        -2.3716e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0396e+00,  7.0286e+01,  1.6628e+00, -2.3112e-01, -6.9645e-01,
        -2.2315e+00, -2.2971e+00, -8.5815e-01,  5.3289e-03,  8.1783e-01,
         1.5361e-01, -7.3545e-02, -1.2189e+00,  5.3381e+00,  6.1266e-01,
        -3.0669e-01,  5.6204e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.3211e-02,  5.1626e+00, -4.4868e-03,  2.2134e-03, -5.2493e-02,
         2.5857e-02, -5.5331e-02, -2.8584e-02,  4.2698e-02, -3.3504e-02,
         6.2668e-02,  7.4653e-02, -3.4838e-02, -9.2648e-06, -5.9181e-02,
        -1.4930e-02, -3.3176e-02, -3.3343e-02, -6.6592e-02, -2.3587e-02,
         1.7813e-03, -1.3616e-02, -8.5229e-04, -3.1461e-02, -2.0186e-02,
        -5.3751e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5466e+00,  1.0957e+01,  6.0507e-01, -1.3132e-01, -8.0004e-02,
         9.1579e-02, -8.9269e-03, -1.0282e-02, -1.0225e-01, -9.8596e-02,
        -2.2621e-02, -7.2053e-02, -9.9455e-03,  3.6947e-02, -1.4012e-02,
         3.4489e-02,  2.2939e-02, -2.9343e-02,  2.7270e-03,  5.0548e-02,
        -4.6069e-02,  3.6458e-02, -5.6645e-02, -3.8109e-02, -5.9197e-02,
         2.2045e-02,  1.9708e-02, -6.2783e-02, -7.5023e-03,  2.2949e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9930e-01,  3.6605e+00,  3.7603e-02,  1.3732e-02,  5.2667e-02,
         2.9250e-02, -7.3835e-02, -1.2040e-02, -1.1455e-02, -9.7981e-03,
         2.0449e-02, -1.6371e-02, -1.9912e-03, -2.8687e-02,  1.4401e-02,
        -1.6161e-02, -1.1149e-02, -2.1357e-02,  6.5651e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-3.6303e-01,  8.7483e+00,  7.0885e-01,  4.6742e-01,  1.2350e-01,
         1.6047e-02,  7.1461e-02, -4.0671e-02,  5.1627e-02,  4.4397e-02,
         2.9521e-02, -2.2989e-03,  3.7806e-02,  7.4351e-02, -1.7185e-01,
        -1.1470e-01,  3.8252e-02,  6.9732e-02, -8.0543e-03,  6.3041e-02,
         2.5974e-02, -1.2400e-02,  5.1869e-02,  2.0935e-02, -3.4440e-02,
        -2.8691e-03,  9.7125e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6667e-01,  1.0720e+01, -6.8980e-02, -2.5884e-01, -5.5919e-02,
         1.6058e-01,  6.8691e-03,  1.0588e-01,  6.2702e-02, -1.2023e-01,
        -5.6997e-03,  4.0267e-01,  7.2848e-03, -1.7622e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5110e-01,  1.1124e+01,  1.4659e-01,  1.0088e-02, -2.5599e-01,
         2.3597e-01,  8.7230e-02,  2.9780e-03, -1.7641e-01, -3.6815e-02,
        -4.3018e-02, -4.1716e-02, -2.9329e-02, -2.4200e-02,  5.3787e-02,
        -5.5673e-04, -1.6707e-03,  1.7831e-02,  2.6728e-02, -2.7650e-02,
        -6.1130e-02,  4.4390e-02,  4.1080e-02, -8.2424e-02,  7.8155e-02,
        -2.8150e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9845, 82.9226,  0.6742,  0.2551,  0.5650, -3.0941,  0.9190,  1.2511,
         0.2151, -0.8466,  0.2716,  0.7826, -0.4095,  3.8347,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1165e-01,  1.6505e+01,  4.6521e-02, -1.6871e-01, -1.6384e-01,
        -5.5544e-02, -3.0668e-02, -2.0368e-01, -1.0825e-01,  2.0160e-02,
        -9.6600e-02, -5.5940e-02,  9.0135e-03,  6.1366e-02, -2.5399e-02,
         1.9578e-02,  8.1178e-02, -2.2785e-02, -2.5201e-02, -3.2317e-02,
        -4.9834e-03, -5.8418e-02,  1.2164e-01,  3.5981e-02,  1.8751e-01,
        -1.0589e-01,  9.4710e-04, -1.5704e-03,  3.8554e-02,  1.8356e-02,
         1.3471e-01,  3.8981e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4120e-01,  1.4930e+01,  2.5129e-01, -1.2954e-01, -1.6450e-01,
         2.2406e-01, -3.9054e-01,  1.0188e-02,  8.1220e-02, -5.0779e-02,
        -1.3590e-01,  7.9391e-02,  6.3293e-02,  6.0272e-02,  4.6092e-02,
         7.1114e-02,  5.7888e-02,  1.8262e-01,  8.3085e-02, -4.1444e-02,
        -3.4806e-02,  3.4293e-02, -9.4609e-02,  4.6919e-02, -1.9644e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0351,  0.6083, -0.0034,  0.0054, -0.0184,  0.0139,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.4226e-01,  8.1900e+00,  1.4475e-01, -7.6713e-02,  6.3619e-02,
        -1.3210e-02,  1.1950e-02, -7.5200e-03,  2.1803e-02, -3.8330e-02,
         9.7888e-03,  1.5468e-01,  2.6202e-03,  6.9762e-02, -3.9982e-02,
        -3.3298e-02, -7.3120e-03,  7.1239e-02,  3.6393e-02,  2.7753e-02,
        -3.3272e-02,  2.5160e-02, -5.0533e-02,  1.7442e-02,  1.4859e-02,
         1.4848e-02, -9.1963e-03, -6.1489e-03, -1.4358e-02,  1.4497e-01,
         4.0319e-02, -2.0255e-04, -8.3475e-03, -3.4170e-02,  2.5716e-02,
        -1.8384e-02,  8.0866e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5925e-01, -2.2281e+01,  5.0919e-01, -4.0406e-02, -4.1639e-05,
         8.0287e-03,  5.3982e-02, -1.0247e-02,  9.0554e-04, -1.7200e-02,
        -9.4151e-02, -5.9129e-02,  8.6232e-02, -3.6667e-02, -6.3918e-02,
         1.6085e-01,  6.4162e-02, -2.3315e-01,  3.4710e-03, -1.3808e-01,
         1.2417e-01,  1.7084e-02,  1.0930e-01, -7.9251e-02, -7.2116e-02,
        -4.4875e-02,  7.8456e-02,  8.8836e-02,  7.6976e-02, -7.8233e-02,
         2.2093e-02, -3.7771e-02,  5.2250e-03, -3.4779e-02,  4.7429e-02,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0515,  1.0414, -0.0969,  0.0086, -0.0046,  0.0130,  0.0256, -0.0042,
         0.0024,  0.0216,  0.0120, -0.0046, -0.0128, -0.0359, -0.0092,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3045e-01, -8.8316e+01, -7.2026e-02, -4.0975e-01,  1.2156e-01,
        -3.9322e-02, -1.5297e-01, -8.9193e-01, -1.4189e-01,  4.8732e-01,
         1.2336e-01, -1.5307e-01,  3.1627e-01,  8.8587e-02, -1.3979e-01,
        -8.5594e-01, -2.6061e-02,  5.0787e-01, -4.2766e-01,  1.1903e-01,
         6.8849e-01,  5.7709e-01,  2.5770e-01, -1.8444e-01, -1.4623e-01,
        -1.8551e-01, -1.3912e-01, -4.6001e-01,  3.8492e-01,  1.7377e-01,
         2.6695e-01,  1.7598e-01, -2.1296e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0062e+00,  1.1913e+02,  1.0260e+00, -5.7160e-01,  4.5189e-01,
        -2.8481e+00, -1.7570e+00,  6.0015e-01,  1.9269e-01,  7.1047e-02,
         4.8715e-01,  1.5853e-01, -8.4434e-01, -1.9730e+00, -2.4914e-01,
        -2.5545e-01, -9.7987e-02, -5.7683e-01,  6.2112e-01, -7.8831e-01,
        -1.2909e+00, -8.9287e-01,  5.7253e-01,  1.1278e-02,  1.8230e-01,
        -2.5375e-02, -2.0369e-01,  2.3581e+00,  1.3938e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.0176,  1.5551,  0.0103, -0.0267, -0.0025, -0.0219,  0.0065,  0.0447,
        -0.0109, -0.0377,  0.0042, -0.0065,  0.0070,  0.0103, -0.0596,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4363e-01,  2.7725e+01, -2.1265e-01, -4.6186e-01, -7.7698e-02,
         5.1882e-02, -9.6921e-02, -5.3239e-02,  1.1835e-01,  7.0810e-03,
         1.2289e-01, -9.7151e-02, -5.5226e-02, -3.0563e-02,  9.3142e-02,
        -1.2655e-01, -1.1162e-02,  5.5032e-02,  2.4344e-03, -5.9518e-03,
         6.7206e-02,  4.2776e-02, -3.3493e-02,  2.6201e-02, -2.3460e-02,
         4.7619e-02, -1.7924e-01,  1.2364e-02,  2.4396e-02, -7.8270e-02,
         3.4141e-02, -1.9095e-02, -8.5554e-02, -5.4610e-02,  2.1128e-02,
         1.1472e-01,  6.8288e-02,  1.5723e-02,  3.6326e-02, -9.5390e-02,
         6.2108e-05,  1.1770e-01,  3.3023e-02, -3.8171e-03,  4.1069e-02,
        -4.0400e-01, -3.3455e-02, -1.0900e-01,  2.7921e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0054e-01,  4.2944e+01, -1.3691e+00, -7.7948e-02,  4.6710e-01,
        -3.0101e-01,  3.2023e-01, -1.2212e-01,  4.5419e-01,  2.9826e-01,
        -1.5957e-01, -7.3432e-02, -9.3383e-02,  1.0578e-01, -1.3850e-01,
        -2.2891e-02,  1.2149e-01,  1.2737e-01,  9.8507e-02, -1.4691e-01,
        -1.3404e-02,  6.3243e-02, -1.2315e-01,  3.4853e-02, -1.0459e-01,
         9.1203e-02, -2.1120e-01,  2.3246e-01, -1.0529e-02, -1.7941e-01,
         1.7509e-01,  4.1309e-02, -1.5038e-01,  1.3840e+00, -9.9929e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4472e-01,  8.6676e+01, -1.4411e+00,  1.3324e+00, -2.9054e-01,
         1.2944e+00, -2.8569e-01, -3.9860e-01,  2.3426e-01,  2.0460e-01,
         2.2310e-01,  7.9123e-02,  5.3100e-02,  1.2238e-01,  3.2498e-01,
        -2.0148e-01, -4.8559e-02,  1.3971e+00,  1.6818e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  -4.1278, -143.3970,    2.6438,   -4.4980,    1.1976,   -0.6781,
          -0.8629,    0.4531,    0.3710,    0.4570,    1.6974,   -1.0302,
          -0.4747,    1.6811,    9.7014,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7949e-01,  1.8448e+01, -3.2315e-01, -1.4200e-01, -9.0221e-02,
         6.1379e-02, -2.5234e-03, -4.1805e-02,  3.1859e-01, -2.6599e-02,
        -2.3902e-02, -3.0135e-02, -1.1516e-01,  5.7896e-03,  7.9089e-02,
        -4.8502e-02, -1.1915e-01, -3.3849e-02, -9.5904e-04,  7.2236e-02,
        -6.8686e-02, -3.3756e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7739e-01,  8.4697e+00, -2.9679e-01, -1.4999e-01, -7.4724e-02,
        -1.3427e-01, -2.2938e-02,  5.4333e-02, -6.4162e-02,  1.8843e-02,
        -4.9767e-02,  5.9829e-02, -2.9600e-02, -4.1058e-02, -1.9682e-02,
        -1.9157e-02, -6.7779e-02, -1.9747e-02, -4.0339e-02, -1.9173e-02,
         1.7283e-03,  1.0408e-01, -5.8762e-02, -1.5018e-02,  1.2916e-01,
         1.2097e-02, -2.9247e-02,  9.0155e-03, -9.5058e-02, -8.9777e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2510, 21.4797, -0.9025, -0.2495, -1.4620, -0.3386, -0.1702,  0.5679,
         0.0836, -0.2159,  0.7022,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3584e-01, -1.3644e+01,  1.2405e-01,  7.0868e-02, -1.2309e-01,
        -1.2133e-02, -6.2974e-02,  6.8892e-02,  1.5251e-02, -1.0695e-01,
         6.8437e-02, -2.0022e-02,  9.7171e-02, -2.2519e-02, -4.9075e-02,
        -6.4708e-02, -1.6912e-02,  4.6262e-02, -1.7871e-02,  2.2995e-02,
         3.9388e-01,  5.5304e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1662e-02,  1.1160e+00, -7.0561e-03, -2.8551e-02,  2.3455e-03,
         5.1001e-03, -4.3241e-03, -1.4652e-03, -6.6321e-03, -7.3074e-03,
        -2.6995e-02,  1.6525e-03, -4.9825e-03, -4.2232e-03, -4.2218e-03,
         8.7372e-04,  2.7855e-03, -6.4408e-03, -6.7148e-03, -1.1132e-03,
        -3.1437e-03, -2.0508e-04, -1.0455e-02, -3.8587e-03,  1.7430e-03,
        -3.4614e-03, -6.0799e-04,  7.4324e-05,  2.7328e-03,  1.7619e-04,
         1.3106e-02, -3.3510e-03,  1.6059e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1671e-01,  1.0350e+02,  1.7056e+00,  1.7874e+00, -7.2965e-02,
         5.9103e-02,  1.9045e-03, -1.4420e+00,  5.1367e-01, -8.5369e-01,
         2.6879e-01, -9.8445e-02, -6.8255e-02, -6.8158e-01, -1.0358e+00,
         1.0076e+00,  1.7979e-02,  5.8373e-01, -1.8233e-01, -5.5149e-01,
         5.8470e-01,  4.1195e-01,  3.0806e-01, -3.1346e-01, -4.1115e-01,
         5.3294e-01,  6.9375e-01,  1.1021e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6719e+00,  1.6343e+02,  2.7753e+01,  3.3296e-01,  1.1375e+00,
        -4.1786e-01, -1.9054e-01,  8.6660e-01, -1.4800e+00,  1.3258e+00,
         1.1975e+00,  2.3350e+00, -4.5036e-02,  3.3643e-01,  7.2967e-01,
         5.0992e-01,  1.4702e+00, -1.5640e-01,  5.5062e-01,  1.0967e+00,
         5.6648e-01,  2.4971e-02,  4.8200e-01,  2.1998e-01,  1.4276e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 3.9935e-01,  3.1284e+01, -1.2362e-01, -1.9578e-01, -2.6996e-01,
        -5.2372e-02,  7.9952e-03,  6.0090e-02, -6.2693e-02, -8.9810e-02,
         2.2564e-02,  1.1252e-01, -5.6811e-02, -7.9856e-02, -1.6868e-02,
        -9.3207e-01, -1.1992e-01,  6.8928e-02, -4.6281e-02,  8.6802e-02,
         1.4302e-01, -1.6360e-01,  1.3691e-01,  1.0578e-01,  5.1705e-02,
        -2.0121e-02, -1.4359e-01,  2.0893e-02, -1.2651e-01,  7.4322e-02,
        -3.1993e-02, -1.0612e-01,  7.3036e-02, -6.1239e-03,  7.3603e-02,
         9.8215e-02, -1.3619e-01, -2.5273e-02,  2.8548e-03,  1.0850e-01,
        -8.2066e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4586e+00, -1.1681e+02, -8.9753e-01,  1.9053e+00, -1.6298e+00,
         2.9695e-01,  4.4975e-01, -1.4590e+00,  2.9592e-01, -1.1384e-03,
         3.2707e-01,  7.9686e-01, -4.0043e-01,  4.4242e-01, -1.5534e+00,
         1.1223e+00,  5.6402e-02, -4.4964e-01,  3.8940e-01,  8.1182e-01,
         2.9498e-01,  1.7221e+00,  8.1295e-02,  1.2655e+00,  2.1495e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6014e+00,  6.9257e+01,  1.8550e+00, -1.3471e+00,  2.0077e-01,
         3.1700e-01, -6.2442e-01,  8.4461e-01, -5.7785e-01, -3.8582e-02,
        -7.2225e-02,  3.0943e-01, -4.3672e-01,  1.7395e-01, -1.9837e-02,
        -5.5807e-01, -5.2074e-01,  2.9535e-01, -7.9793e-01,  2.3495e-01,
         6.5803e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0921e-02,  4.9488e+00, -3.9941e-02, -5.9578e-02, -2.5388e-02,
         6.3360e-03, -3.8137e-02, -9.6524e-02, -9.0526e-03, -6.8168e-03,
        -5.0133e-02, -6.2038e-02, -2.8128e-03, -4.3272e-02, -5.4728e-02,
        -5.3617e-02,  1.1382e-01,  3.4838e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4780e-02,  9.0734e-01,  1.4295e-03, -3.7484e-03, -5.6308e-03,
         5.6016e-03,  2.3195e-03,  4.8169e-04, -2.2841e-03,  4.7503e-04,
         6.2267e-05,  1.2598e-03, -7.0929e-03,  3.3346e-03, -3.4398e-03,
         3.9621e-03,  1.5214e-03,  1.7432e-03, -7.2396e-05, -1.8634e-03,
        -3.2091e-03, -1.0011e-03, -1.3109e-03, -2.2031e-03, -8.3042e-03,
         1.5728e-03, -3.0430e-03, -5.4318e-03, -3.2239e-03, -9.0416e-04,
         8.4869e-04, -1.5224e-04, -1.6462e-03, -5.7341e-04,  1.4657e-03,
         4.0764e-03,  3.1370e-03, -2.5187e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5492e+00, -1.4724e+02,  2.8230e+00, -2.4920e+00, -4.5914e-01,
        -4.8755e-01,  2.0107e-03, -1.9335e-01, -5.3068e-01, -9.2591e-01,
         7.1061e-01, -3.6012e-01, -8.0503e-02, -8.7623e-01, -6.2603e-01,
        -1.9614e-01, -2.4522e-01,  5.2290e-01,  1.0829e+00,  3.1028e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6817e+00, -1.0441e+02,  4.4324e+00,  5.3891e-02, -9.0489e-01,
        -3.0362e+00, -1.8858e-01, -2.0220e+00, -9.0145e-01, -1.1826e+00,
        -1.6984e+00, -3.4249e-01,  1.2360e-01, -1.6074e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9992e-01,  3.6847e+00, -1.8927e-03,  2.9116e-03,  2.6643e-02,
         7.7254e-04, -4.0880e-03, -5.4922e-03,  8.7643e-04, -4.8245e-03,
        -9.0007e-03, -1.1278e-03, -1.2775e-02, -7.6373e-03,  1.2992e-02,
         1.4138e-02,  6.4775e-03,  2.7412e-02, -2.3197e-03, -1.4812e-04,
        -6.2225e-03,  7.3016e-02, -6.5060e-03,  2.2137e-02, -1.4036e-02,
         1.9653e-03,  4.1272e-03,  2.8280e-02, -2.1380e-02,  1.3537e-02,
         1.0212e-03, -2.1838e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7312e+00,  1.8964e+02,  2.5978e+00,  2.8938e-01,  1.8332e+00,
        -2.7769e+00,  1.7967e+00, -8.7573e-01, -1.4567e+00, -2.0192e+00,
        -1.2894e-01, -1.5431e+00, -5.5366e-01, -1.2196e-01,  2.4711e-01,
         2.6709e-01,  5.7722e-03, -1.1438e+00, -1.7678e+00, -5.2197e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.5417e-01,  2.4741e+01,  2.2668e-01,  2.6173e-01, -6.3260e-02,
         2.4159e-02, -4.6487e-02, -5.5392e-02, -2.9259e-01, -1.0777e-01,
        -1.6833e-01, -1.2604e-01, -1.1420e-01, -9.3807e-02, -9.1590e-02,
        -1.6089e-02, -8.0919e-02, -2.2029e-01, -9.1541e-02, -1.0967e-01,
        -8.1039e-03,  5.1823e-02,  1.0205e-01,  6.5721e-02, -9.8513e-02,
        -1.5380e-01,  7.3475e-02, -2.6946e-01, -1.0749e-01,  1.9002e-01,
         1.2502e-01,  6.1991e-02,  8.7515e-02, -3.3666e-02,  1.4450e-02,
         2.3802e-01, -1.6514e-01, -5.3560e-02, -8.7976e-03, -6.2200e-02,
         3.5215e-02, -2.6994e-02,  2.0333e-02,  9.4794e-02, -4.5951e-03,
         1.9636e-01,  1.4846e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2792e+00,  5.8818e+01, -8.0275e-01,  1.6251e-01,  2.6610e+00,
         1.6597e-01, -1.7899e-01,  5.9998e-03,  5.3566e-02, -1.0270e+00,
        -4.6745e-01, -1.3970e-01,  1.3849e-01,  8.3647e-01, -3.6927e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9257e-02,  7.6462e+00, -1.1864e-01, -7.3886e-04,  6.5333e-02,
        -6.0752e-02, -3.7247e-02, -7.1507e-02, -2.0565e-02,  3.2329e-03,
        -1.0034e-02,  1.3958e-02,  1.0821e-02,  9.7763e-02,  1.6662e-02,
        -8.2856e-03,  3.6542e-03, -1.0354e-02,  5.5048e-03, -3.1610e-02,
         1.7775e-02, -8.4648e-03, -8.1754e-03,  2.6145e-02,  2.6464e-02,
        -4.2465e-02,  4.5442e-02, -6.0044e-02,  1.1689e-03,  4.5812e-02,
        -7.8724e-03, -3.4904e-02,  1.8573e-02,  1.0542e-02,  3.0111e-02,
         4.1657e-02, -2.2096e-02,  1.6856e-02, -2.2896e-02, -1.5894e-02,
         2.3029e-02, -1.1912e-02, -3.5709e-02, -9.8326e-03, -2.0462e-02,
         3.2602e-03,  5.6184e-02,  7.6807e-02], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-3.2589e-01,  4.3408e+00,  8.1386e-02, -7.2560e-02,  8.4502e-02,
         1.2238e-02,  6.4052e-02, -2.5106e-02, -3.5928e-02,  2.6508e-04,
        -3.0167e-02, -7.3635e-02, -3.0145e-02, -4.1955e-03,  9.4189e-03,
         6.2784e-03, -4.6923e-02,  3.0490e-02,  3.4991e-03, -1.5341e-02,
         1.4514e-03,  3.5654e-02, -2.9664e-02,  5.6092e-02,  3.2925e-02,
         2.7047e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.9371e-01,  1.2034e+02, -1.3599e+00, -6.9230e-02, -2.4140e-01,
         3.0818e-01,  5.0646e-01,  1.6104e+00,  8.4836e-02,  3.8776e-02,
         3.0818e+00, -1.2306e+00,  3.3372e-01,  1.8383e-01, -5.9817e-01,
        -4.2118e-01,  1.9140e+00, -1.7938e+00,  1.4721e+00, -3.9591e-01,
        -1.7582e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1227e-01,  6.8215e+00,  1.6028e-01,  6.2109e-02, -1.5976e-01,
         1.6712e-02,  2.1648e-03, -4.4616e-02,  2.6306e-02,  1.2169e-03,
         3.6493e-02,  1.4378e-02, -3.9686e-02, -1.9822e-03, -4.3822e-02,
        -2.3760e-02,  3.0653e-02, -5.4534e-02,  6.9483e-03,  2.8331e-03,
        -2.6698e-02, -5.2105e-03,  1.1797e-02,  4.1870e-03,  8.5944e-02,
        -8.2848e-03, -1.1121e-03,  2.8775e-02,  1.2225e-02,  9.6788e-03,
        -5.9039e-02,  2.9281e-02,  6.7628e-02,  1.3833e-02, -3.9701e-02,
         9.9943e-02,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7397, 32.2850,  0.2182,  0.2886,  0.3710,  0.2374,  1.1609, -0.4072,
         0.1687, -0.1984,  0.1013, -0.3045, -0.3088, -0.3864, -0.0966,  0.1242,
         0.0658, -0.4869, -0.2565,  0.2882,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1907e+00, -8.1695e+01,  2.3494e-01, -6.4113e-01, -7.4513e-01,
         1.0056e-01,  2.4727e-03, -1.5367e-01, -1.9981e-01,  3.3066e-01,
        -5.1971e-02,  3.8437e-01,  6.2166e-01,  3.2676e-01, -5.8730e-01,
        -1.1828e-01,  9.4893e-01,  9.0996e-01,  1.4766e-01,  2.6994e-01,
        -4.7034e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1372e-01,  1.7277e+02,  4.6533e-01,  8.3827e-01,  8.4712e-02,
        -1.6214e-01, -4.6040e-01,  3.5642e-02,  7.5750e-01,  2.0063e+00,
        -1.7302e+00,  5.0600e+00,  3.3220e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6705e-02,  7.9871e+00,  8.6420e-02, -5.4727e-02, -2.8282e-02,
         4.0556e-03,  1.4086e-02, -2.2380e-02,  6.2501e-03, -3.0799e-02,
         3.9841e-02, -3.8266e-03,  1.2380e-02, -2.5947e-02,  9.8621e-02,
        -2.1769e-02, -4.9861e-02,  6.2412e-02, -2.2563e-02, -2.1029e-02,
        -2.5634e-02, -3.8485e-02, -1.3194e-02, -3.6508e-02,  1.5468e-02,
         4.5677e-03, -4.9332e-02, -1.1594e-02,  1.3426e-02,  1.9369e-02,
         1.4478e-02, -1.7320e-02,  2.1209e-02, -3.5446e-02,  4.8739e-02,
         8.8466e-02,  8.0529e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.4276e-02,  4.4480e+00, -7.6777e-03, -6.5919e-02,  4.3956e-02,
        -1.6837e-02, -1.4166e-02, -5.8263e-02,  5.3980e-03,  2.9924e-02,
        -1.2557e-03,  5.6635e-03,  3.8483e-02, -1.5422e-03, -1.7723e-02,
         1.7135e-02,  2.1557e-02,  1.1096e-02, -1.6912e-02, -3.8399e-02,
         1.7983e-02,  2.4899e-02, -3.5570e-02, -2.9580e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1301e+01, -9.5992e+01,  1.2316e+00,  1.0586e+00,  1.7591e+00,
        -7.9295e-01, -1.3597e+00, -3.5575e-01, -1.2100e+00,  3.2039e-01,
         7.5208e-01, -3.9711e-01,  2.0910e+00,  2.5360e-01, -1.4782e+00,
         1.1532e-01,  7.4728e-01, -7.1205e-02, -1.8790e-01, -3.4139e-01,
         1.6363e-01,  3.5133e-02,  1.5847e-02, -9.9668e-02, -2.4101e-01,
         1.1833e-01, -3.6103e-02,  3.1559e-01,  1.8104e-01,  4.5921e-03,
         3.5203e-01, -1.2004e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1260e-02,  1.0708e+01,  1.4272e-01, -2.5093e-02, -1.0001e-01,
         3.0569e-02,  9.0853e-03, -8.4731e-02,  1.1435e-01, -2.0841e-02,
        -4.0065e-02, -7.3270e-03, -3.7580e-02, -1.3934e-02, -1.9251e-02,
         1.6504e-01, -3.5024e-02, -1.2109e-02,  7.7268e-02,  7.2015e-02,
         1.3383e-01, -1.2190e-02,  3.9700e-02, -7.5870e-03, -5.4268e-02,
        -8.0325e-02,  4.8281e-02, -3.3752e-02,  2.0014e-01, -1.3267e-01,
         3.2877e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2447e-01,  1.6149e+01,  8.5688e-01,  5.0809e-02,  1.5585e-01,
        -7.4243e-02, -3.1967e-02, -1.9927e-01,  1.4936e-01,  7.1457e-04,
        -3.9768e-02, -7.9160e-04,  9.6119e-02, -1.5346e-01,  2.0444e-02,
        -1.0610e-02,  9.1583e-02,  2.5065e-02,  3.1423e-02,  2.8331e-02,
         1.5596e-01,  2.9590e-01,  1.4576e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0159e-02,  2.0141e+00,  3.9628e-02, -1.6699e-02,  1.8229e-02,
         1.0153e-02, -7.8693e-03, -5.4772e-03, -1.6651e-02, -9.0486e-03,
        -3.9774e-04,  1.3281e-03, -7.4923e-03, -4.7303e-02, -9.0224e-03,
         9.0514e-03, -5.7799e-03, -3.1560e-05,  1.0454e-03,  7.5155e-03,
        -1.5631e-02, -6.1437e-03,  1.0240e-02, -5.4074e-03,  1.7046e-03,
         4.8168e-03,  3.6558e-02, -1.0079e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 4.1612e+00, -1.8544e+02,  9.3264e-01, -2.9258e+00, -5.2763e-01,
        -4.3081e-01,  4.3370e-02,  2.8175e-01,  2.0063e+00,  1.3022e+00,
        -8.7144e-01,  1.1135e+00,  4.0189e-01, -1.7529e-02,  4.7903e-01,
        -7.5169e-01,  2.0520e-01, -7.9943e-02,  1.6673e+00,  1.3488e+00,
        -1.0637e-01, -2.5133e-01,  1.0045e+00,  2.1375e-01, -2.0461e-01,
        -5.6878e-01, -5.8095e-01,  5.8056e-01,  1.0111e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2926e+00, -9.3562e+01,  2.2408e+00, -6.0462e-01, -3.9140e-01,
         9.1986e-01, -1.7542e-01,  7.7821e-01,  4.9108e-01, -6.8941e-02,
        -4.8408e-01,  9.9803e-01,  5.6083e-01,  1.5506e-02,  1.2821e-01,
         6.5488e-02, -2.6859e-01, -1.6870e-01,  1.5075e-01,  3.5187e-01,
         2.1075e-01,  1.0526e-01,  1.1306e+00, -2.4455e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7735e-01, -2.1178e+01, -7.0294e-01, -1.2374e-01, -1.8576e-01,
        -2.2386e-01,  1.9714e-02, -7.2783e-02, -1.8930e-01,  9.3460e-03,
        -1.0563e-01, -4.7722e-01, -1.1661e-01,  2.5244e-03, -1.4663e-02,
         8.9998e-02,  2.1664e-02, -3.2028e-03,  1.3621e-01, -7.8354e-02,
        -2.0645e-01,  1.2266e-01,  2.8011e-02, -1.3152e-01, -4.2992e-02,
         8.5570e-02,  2.5889e-01, -2.3727e-02,  1.0695e-01, -7.1084e-02,
         3.5185e-02,  2.4839e-02,  3.0793e-03,  6.7143e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8295e+00, -1.1377e+02,  5.8152e-01,  3.2017e-01, -3.9904e-01,
        -5.8461e-02, -1.8562e+00, -3.2191e-01, -6.6637e-02,  1.2566e-01,
        -3.8649e-01,  1.1397e+00, -1.3145e-01,  5.2383e-01,  3.9381e-01,
        -1.0212e-02, -5.6131e-02, -1.0962e-01,  6.2080e-01,  2.4932e-01,
        -2.0952e-01, -8.8251e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1078e+00, -1.1113e+02,  1.9373e+00, -4.1693e+00, -4.9776e+00,
        -5.6071e-01, -5.9364e-02,  8.8772e-01,  6.0889e-01, -4.3576e-01,
        -9.3001e-01,  1.1622e+00, -5.4218e-01, -1.0357e+00,  5.6876e-01,
         1.6068e+00, -3.5500e-01, -1.9651e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9012e+00, -1.4999e+02,  3.1793e+00, -4.1237e+00,  8.6065e-01,
         1.4559e+00, -1.8080e-01,  7.9272e-01,  1.1937e+00, -1.4054e+00,
         2.7553e-02, -5.4205e-01,  8.2350e-01, -1.5481e-01, -5.6928e-01,
        -1.0850e-01, -4.1616e-01,  1.3179e+00,  4.5345e-01,  3.2591e-01,
         8.4382e-01, -9.8096e-01,  1.2508e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8247e+00,  1.7074e+02,  2.6242e+00,  1.0159e+00, -2.2276e+00,
         2.6153e-01,  1.2804e-01, -6.5684e-01,  6.4968e-03,  8.1418e-01,
        -2.8328e+00, -2.2489e-01, -4.6742e-01,  1.7494e+00, -1.6513e-02,
        -1.2686e-01, -5.4304e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.1916e-01, -1.2957e+01, -6.7450e-01, -4.7864e-02, -1.4565e-01,
         4.5610e-02, -3.9421e-03, -4.6424e-02, -4.7065e-02, -1.7735e-01,
        -1.1159e-01, -1.2654e-02, -8.0533e-03, -1.9734e-02, -3.9735e-02,
        -1.8696e-02, -3.1400e-01,  2.5121e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6895e+00, -9.7104e+01,  1.6539e+00,  6.6066e-01, -2.5521e-01,
        -3.3903e-01, -2.3416e-01, -2.7469e-01,  9.9205e-02,  6.8064e-01,
         5.1591e-02, -1.9982e-01,  7.0502e-02,  4.0181e-01,  3.0568e-01,
        -9.5700e-01,  9.5494e-01,  6.4759e-01, -3.6607e-01,  1.8150e-03,
         3.2346e-01, -4.4580e-02,  3.2203e-01,  6.2075e-01, -4.2558e-01,
         1.1603e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.3559e-02,  6.7111e+00,  1.3375e-01, -5.9399e-02,  1.1333e-01,
         1.9267e-01, -7.6077e-02,  9.6743e-02,  5.1033e-02, -1.0706e-01,
        -1.6881e-02, -1.2874e-02, -3.5667e-02,  2.2590e-03, -5.8474e-02,
         7.0980e-03, -3.2456e-02,  3.1828e-03,  3.7993e-02,  7.2186e-03,
        -1.0882e-02,  1.0286e-02, -1.5757e-02,  4.1220e-02,  3.7802e-02,
        -4.1792e-02,  3.3765e-02, -1.7607e-02, -3.1273e-03,  6.0873e-02,
        -5.2511e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.3923e+00, -1.2156e+02, -3.7424e+00, -8.3615e-02, -3.9967e-01,
         1.5317e+00, -1.0700e+00, -2.1753e+00, -1.1435e+00,  3.2539e+00,
        -9.9681e-01,  3.1313e-01, -1.1319e+00,  1.2961e-01,  5.3658e-01,
         9.8261e-01, -5.4414e-01,  1.4851e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8645, 20.3161, -1.0010, -0.2419,  0.0277, -0.1826, -0.3984,  1.6370,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-4.4245e+00, -1.3495e+02, -1.3986e+00,  1.1697e+00, -1.1379e-01,
         3.2160e-01,  7.3112e-01,  6.5544e-01, -8.9018e-01,  4.2179e-01,
        -4.0029e-01,  1.1244e+00,  1.2852e-01, -6.9461e-02,  6.9148e-02,
        -6.0885e-01, -3.6681e-01, -2.7850e-01, -1.4676e-01, -3.2615e-01,
        -5.2742e-01, -1.8530e-01,  9.8635e-02, -1.1510e-01,  5.7123e-01,
         2.3251e-01,  4.6921e-01, -4.2212e-01, -3.2737e-01, -2.8115e-01,
         9.6002e-02, -1.3787e-01, -3.2255e-02,  2.0341e-01,  9.6457e-02,
        -2.1667e-01, -4.2484e-01, -3.3938e-01,  2.8033e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2136e+00, -1.5878e+02,  2.0385e+00,  1.4728e+00, -9.8054e-01,
         7.6141e-01,  1.7283e+00,  2.4147e-01,  3.5783e-01,  8.1200e-01,
         2.3813e-01, -2.7566e-01, -9.6315e-01,  2.2953e-01, -5.9750e-01,
         1.1530e+00,  1.3418e+00,  2.8629e+00,  5.1853e-01, -1.3060e+00,
         3.0394e+00,  1.2032e+00,  2.1333e-02,  2.6084e-01, -2.3617e-01,
         1.1224e+00, -1.4313e+00, -1.2924e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1728e+00, -4.9237e+01,  4.0931e-01,  2.3858e-01,  1.2378e-02,
         1.5594e-01, -2.9944e-02,  6.2741e-01, -2.9589e-01,  5.9703e-01,
        -7.5904e-01,  8.7818e-01,  5.0601e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6787e+00, -9.3664e+01,  2.2665e+00,  3.6074e+00,  8.2719e-01,
         2.1990e-01,  1.1273e+00, -4.9346e-02,  4.8275e-01,  4.3069e-01,
         5.5261e-01,  5.5055e-01, -1.1339e-01,  6.5236e-01,  1.0044e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.6270e-02, -7.1776e+00, -1.5198e-03, -4.7202e-02, -5.9755e-02,
        -9.5689e-02, -1.6858e-02,  4.7857e-03, -5.4372e-02, -9.0739e-03,
         3.9269e-03, -3.4849e-03,  3.3597e-02, -5.0449e-02, -2.6365e-02,
         2.1901e-02,  3.4857e-03, -8.2270e-03, -2.5373e-02,  4.6521e-02,
        -1.5738e-02, -3.0055e-03, -7.3814e-03, -1.0766e-02, -9.5294e-02,
        -6.1606e-02,  1.3845e-02, -3.5570e-02,  2.4989e-02,  7.7298e-04,
         2.9266e-02, -3.9269e-03, -7.5308e-03, -3.1393e-02, -8.1455e-02,
         1.0466e-02,  1.4360e-02,  1.8884e-02,  5.5794e-03,  3.6584e-03,
        -4.0315e-02,  1.7746e-02, -1.1399e-02,  1.0257e-02, -3.0892e-02,
         1.5153e-02, -5.2598e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5572e-01, -6.9567e+01,  8.1345e-01,  6.3280e-01, -1.7093e-01,
         2.4797e-01,  7.9822e-02, -1.2777e-01, -2.7714e-01, -1.6978e-01,
        -2.7970e-01,  7.8808e-02,  6.1997e-02,  1.5336e-01,  5.2420e-02,
        -2.8039e-01,  7.5854e-02, -2.8007e-02,  2.6115e-01, -8.0313e-02,
        -9.6721e-02, -4.2659e-02, -6.2487e-02, -2.5927e-01, -2.1124e-01,
        -1.2666e-01, -3.7866e-01, -2.0258e-01,  1.2432e-01, -2.5223e-01,
        -1.6014e-01,  1.9618e-02, -1.3448e-01, -2.3266e-01, -2.6890e-01,
         1.5082e-01,  2.7583e-02, -8.3400e-02,  2.9081e-02, -1.4608e-01,
         3.3267e-02,  3.7743e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0961e-01,  2.3340e+01, -3.9522e-02, -3.9298e-01,  3.7499e-01,
         1.6325e-01,  6.5137e-01, -1.3227e-01, -2.0157e-02, -1.8388e-01,
        -2.3199e-02, -1.1603e-01, -1.3887e-01,  5.2796e-02,  2.6822e-01,
         1.2108e-01,  6.8625e-01, -3.7626e-02,  3.3441e-02, -4.4708e-02,
        -1.5801e-01,  5.7058e-03, -1.9016e-01, -6.0278e-02,  3.5806e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4128e-01,  3.7489e+00,  7.2764e-02,  2.8448e-02,  1.9420e-03,
         1.5272e-03,  4.5553e-02, -1.3187e-02,  1.0752e-02, -9.4678e-03,
         5.3804e-04,  2.8783e-02,  2.3999e-02, -1.8670e-02, -1.6801e-02,
         1.9556e-03, -1.4369e-02,  7.7576e-03,  1.4114e-02,  1.2599e-02,
         6.6392e-02,  2.4546e-02, -2.3920e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6390e-01, -1.8160e+01, -4.8207e-01, -2.9246e-02,  2.2240e-01,
        -1.5519e-02, -5.4357e-02, -9.4910e-02,  1.9432e-01, -3.6902e-01,
         1.0735e-02,  5.1739e-03, -1.5014e-01,  1.2445e-01,  1.4771e-01,
         1.2154e-02, -1.2071e-02, -7.1712e-02, -5.6512e-03,  3.8723e-02,
         1.8503e-02,  5.6642e-02,  3.6529e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6224, 20.8174,  0.3700, -0.3007, -0.1825, -0.3473,  0.3994,  0.5206,
        -0.0575,  0.0763,  0.3987,  0.1122, -0.2087,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1013e-01, -3.9716e+01, -1.1681e+00,  6.8058e-01, -7.5484e-02,
         1.4453e-01,  3.2562e-01, -8.5302e-02,  1.7566e-01,  2.9197e-01,
        -1.1365e-01, -2.8042e-02,  5.9988e-01,  8.2845e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5768e+00,  2.0128e+01, -3.0751e-02, -3.3137e-01,  3.7422e-02,
        -1.2957e-02, -1.0906e-01, -9.1069e-03, -8.2989e-02, -1.4918e-01,
         1.1168e-01,  1.1284e-01, -4.5682e-02,  9.9636e-02, -6.1137e-03,
        -1.4852e-02, -2.3549e-01, -2.8240e-02,  5.2790e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 5.5957e+00, -1.5153e+02, -7.6332e-01,  1.6070e+00, -1.4449e+00,
         7.8544e-01, -1.3164e-01,  1.8869e+00, -4.2664e-02, -3.7923e-01,
        -4.7378e-01, -1.1298e+00,  1.1508e-01, -1.7249e+00, -2.6920e-01,
         3.9758e+00, -6.7001e-01, -2.8542e-01, -6.8901e-01, -6.7321e-01,
        -5.6877e-03,  1.4648e-01,  7.1127e-01,  2.2087e-03, -2.9916e-01,
         1.3162e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8798e-01,  2.2735e+00,  3.3763e-02,  2.8512e-02, -1.8493e-02,
        -3.2487e-02, -4.2728e-02,  6.0450e-03, -4.4482e-02, -7.0546e-02,
         2.1238e-03, -7.0098e-04,  1.0329e-02,  2.5937e-02, -1.9412e-02,
         3.9405e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7553e+00, -6.9163e+01, -1.2006e+00, -2.7082e+00,  2.2166e-01,
        -6.4573e-01, -1.0610e-01,  1.0149e-02, -3.2600e-01, -6.9783e-01,
         4.8743e-02, -9.9715e-03, -3.7167e-01,  1.4912e-02,  1.3141e-01,
        -6.4554e-01,  7.8495e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -2.4541, 124.8221,  -0.6710,   0.6540,   0.2001,   0.2443,  -1.7382,
          2.8226,   1.8404,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8172e-01, -3.7645e+01, -1.2642e+00,  4.3294e-01, -8.5741e-02,
        -9.4642e-01, -4.9121e-02,  1.3074e-01, -2.0997e-01,  2.4411e-01,
        -3.8935e-01,  8.5674e-02,  8.4195e-03,  5.2256e-02,  8.7940e-02,
        -1.1444e-01,  5.8782e-02,  5.0608e-02, -1.1906e-01, -2.0408e-01,
        -5.2976e-03,  1.9146e-02,  8.3964e-02,  2.3722e-01,  1.5046e-01,
         1.6292e-01,  5.2127e-01,  1.0607e-01, -7.2734e-02,  2.0078e-02,
        -2.0690e-01, -7.7024e-02, -3.2552e-02,  1.3357e-01, -1.2077e-01,
         3.9855e-01,  1.7558e-01,  2.7833e-02,  3.2472e-01,  1.0990e-01,
         5.6974e-02, -5.4718e-02,  6.1114e-01,  2.8863e-01,  1.6759e-01,
        -7.6855e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1895e-01,  1.7441e+01,  5.0531e-01, -1.6739e-02,  3.7226e-03,
        -9.4214e-03, -2.5267e-02,  2.3219e-02, -1.3009e-01,  1.2891e-01,
         2.5850e-02,  4.9633e-03, -3.6920e-02, -2.3343e-02,  1.2752e-01,
         6.7200e-02,  2.2574e-02,  4.5683e-02, -5.9907e-02,  5.7558e-02,
        -3.1956e-02,  2.6357e-03, -3.6127e-03,  2.9182e-02, -8.7526e-02,
        -1.2478e-02,  1.5717e-02, -9.2707e-03, -1.8960e-01, -1.4923e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2440, -6.1239,  0.1519,  0.1501,  0.0669,  0.2569,  0.0445,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6660, 43.1494,  0.0974, -0.2168,  0.4111,  0.2661,  0.6253,  0.2501,
         0.2304, -0.0814, -1.1251,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0023e+00,  9.9280e+01,  1.3312e+00,  4.5371e+00,  1.3098e+00,
        -4.1814e-01,  1.0646e+00, -7.0582e-01,  7.4789e-01,  9.8426e-02,
        -6.8958e-01,  1.9395e-01, -7.3846e-01, -3.4318e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3341e-01,  1.1618e+02, -2.0326e+00, -7.8542e-01,  7.7425e-02,
        -7.7469e-01, -3.6830e-01, -1.8758e-01,  5.5020e-01, -9.1725e-02,
         2.0492e-01, -7.4227e-02, -3.0320e-01, -8.3633e-01,  3.0947e-01,
         9.3118e-02,  8.7022e-02, -7.6308e-02, -1.4618e-01,  1.1139e+00,
         1.7850e+00,  1.0666e+00,  2.4893e-01, -1.0597e-02, -2.3792e-01,
        -6.0901e-01,  8.3936e-03,  1.6976e-01, -3.4849e-01,  2.5025e-01,
        -3.0710e-01,  2.8920e-03, -6.9843e-01,  2.0643e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5004e-01,  7.0284e+01,  7.3483e-01,  2.8986e-01, -5.3056e-01,
        -2.3801e-01,  8.9918e-01, -1.9450e-01,  3.8918e-01, -5.0891e-02,
         5.5150e-01,  1.1230e-01, -2.7797e-01, -4.0675e-01,  1.4152e+00,
        -5.3758e-01, -3.8102e-01, -2.5862e+00, -1.3105e-01, -1.8813e-01,
         1.7456e-01, -1.7026e-01, -2.3174e-02, -9.2136e-02, -1.4004e-01,
        -4.9809e-01, -4.4358e-01, -1.8893e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3584e-02,  8.5394e+00, -1.2488e-02, -1.1749e-03,  3.0069e-03,
        -2.3295e-02, -6.5503e-02,  3.3488e-02, -2.6483e-03,  2.7135e-02,
        -2.3096e-02, -9.1529e-02,  2.5621e-02,  4.5320e-02, -3.0942e-02,
         2.9961e-02,  3.2879e-02,  3.8642e-02,  3.6646e-02,  4.2436e-03,
         2.8047e-02,  2.0128e-02, -3.5261e-02, -1.6410e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-7.2015e-01,  1.3799e+02,  1.6024e+00,  1.1663e-03, -6.0365e-02,
        -1.4172e+00,  8.4369e-01, -3.5050e-01,  8.5339e-01, -9.5493e-02,
        -3.0694e+00, -1.0731e+00, -2.2033e-01,  4.7691e-01,  9.2095e-02,
         8.9521e-02, -1.7897e-01, -4.9554e-02,  1.3122e+00,  2.5866e+00,
         4.1619e-01,  1.0454e-01, -6.8039e-01,  3.3915e-01,  4.8145e-01,
         1.1383e+00,  5.2240e-01,  5.3277e-01,  6.3809e-01, -1.9047e+00,
        -4.4776e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.4641, 120.3037,  -2.1608,   5.5209,  -0.4231,  -0.3609,   0.4232,
          0.3151,   0.9487,   0.6332,  -0.7944,  -0.6078,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1177e-01,  4.7202e+01,  3.3683e-01,  1.9579e+00, -8.6621e-02,
        -1.4139e-02, -2.8929e-01, -1.2718e-01, -2.3148e-01,  6.7244e-02,
        -3.9371e-02, -1.1029e+00,  1.4259e-01, -1.7790e-01, -2.0615e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0230e-01, -2.3922e+01, -4.6057e-01,  3.6903e-01,  2.2701e-02,
        -1.3274e-02,  6.3347e-02,  1.2774e-02, -5.1591e-02,  7.4830e-02,
         2.2322e-02, -6.3092e-02,  1.4042e-02,  1.2191e-02,  1.1972e-01,
         9.8171e-02,  6.7873e-02,  5.8207e-02, -1.9840e-01,  9.9081e-02,
         8.5975e-02,  4.3402e-02, -8.0433e-02, -3.1759e-02, -1.3986e-02,
        -3.9280e-02, -8.9205e-02,  7.3562e-02,  8.5809e-02,  3.7516e-02,
         1.5563e-01,  2.2522e-01, -8.2812e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5786e-02, -2.0796e+00,  4.3850e-02,  2.0869e-02, -1.7436e-02,
         2.0013e-05,  1.5559e-02,  1.5270e-02,  4.3698e-03,  4.4374e-03,
        -1.1162e-02, -1.1575e-02, -4.9748e-03,  4.7928e-03,  1.0656e-02,
        -3.2157e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2346, 88.7664, -1.9087, -0.5392,  0.7206,  2.1112,  1.0183,  0.2616,
         1.0471,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  -4.9366, -103.8344,    1.5502,   -4.7567,    4.7839,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3949e+00,  1.4248e+02,  1.2337e+00,  1.5898e-01,  8.0135e-01,
        -3.5302e-01, -1.0852e-01,  1.0431e+00,  2.1056e+00,  8.0404e-01,
         2.0438e+00, -7.1034e-01,  4.5744e-01, -1.0401e+00, -1.3426e+00,
        -3.2361e-02,  6.7581e-01, -2.4876e-01,  2.7918e-01,  7.2718e-01,
        -3.6639e-01, -1.2526e-01,  3.0895e-01, -4.7966e-01,  2.2055e-01,
        -1.8061e+00,  1.2863e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  17.1120, -130.4198,   -3.0390,   -2.3379,   -2.3429,   -0.9441,
           1.4739,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9376e-02,  4.8596e+01, -1.5138e-02, -6.8538e-01, -2.2525e-01,
         1.9910e-02,  4.1342e-01,  3.5635e-01, -5.0519e-01,  2.5963e-01,
        -1.2155e-01,  1.7649e-01, -7.4882e-02,  1.9262e-01,  4.2515e-01,
         1.8042e-01,  4.9371e-02, -3.8655e-01,  3.5697e-01, -2.1305e+00,
        -4.6958e-01,  6.2981e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3537e+00, -1.9761e+02,  1.3859e+00, -1.5206e+00,  2.8363e+00,
         3.7745e-02,  6.6991e-01, -1.1468e+00, -1.1791e-01,  1.5068e-01,
         2.1552e+00, -2.0256e+00,  3.2144e+00, -5.7060e-01,  9.6314e-01,
        -7.4818e-01, -1.8846e-01, -1.6887e-01, -1.5342e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.8961e+00, -1.4395e+02, -1.6331e+00,  8.8858e-01,  1.8579e+00,
         1.8670e+00,  1.8822e+00, -4.1741e-01, -1.7652e+00,  1.4072e-01,
        -9.6673e-03, -5.3478e-01, -4.0398e-01, -7.2631e-01,  4.9961e-01,
        -5.8275e-01,  9.0042e-01, -3.7618e-01,  1.7337e-01, -4.1465e-01,
        -6.2386e-01, -8.2219e-01, -5.1405e-01,  7.1868e-02,  1.5267e-02,
        -5.4963e-01,  8.2987e-03,  1.8456e-01, -6.1049e-01,  8.7334e-01,
        -1.7600e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 4.4358e-01, -1.1194e+01, -7.9307e-02,  1.7998e-01, -1.5046e-01,
         1.2971e-01,  1.7478e-01, -2.1958e-02,  2.6018e-03, -3.8067e-03,
        -2.9208e-03, -7.7665e-02,  1.0381e-01, -2.1404e-01, -2.8519e-02,
        -9.7701e-02,  1.3117e-01,  3.5386e-02,  1.6541e-01, -4.2552e-02,
        -1.2549e-02,  1.9950e-02, -2.3250e-02,  6.2607e-02,  8.1713e-03,
         1.9281e-02,  4.4756e-02,  1.3529e-01, -3.6850e-02, -6.2660e-02,
        -7.7272e-02,  1.2048e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0697e-01, -3.1508e+01, -4.0061e-01, -2.0878e-01,  3.3797e-02,
         4.9735e-02,  6.0094e-02, -3.3704e-01,  1.9027e-02, -9.4952e-02,
         2.0732e-01, -2.7625e-01,  1.9453e-01, -3.3431e-01, -2.8563e-03,
        -1.0107e-01, -7.9598e-02, -3.1039e-02,  1.0584e-01, -1.3275e-02,
        -3.2669e-01, -1.0263e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7649e-01,  2.3438e+01,  3.3673e-01,  1.7407e-01, -1.7793e-02,
         5.9891e-02, -1.6971e-01, -1.6970e-01, -8.4696e-03, -3.3318e-02,
         2.7341e-01,  2.4337e-02,  1.9306e-01,  1.0374e-01, -1.4199e-01,
        -9.8808e-03, -1.4451e-01,  3.8587e-01, -2.1005e-01, -4.6420e-01,
         4.2517e-03, -1.2503e-01,  2.1856e-02, -1.7628e-01, -5.8310e-02,
        -1.6198e-01,  6.9516e-02, -1.1453e-02,  2.4485e-01,  5.5611e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0575,  3.8814,  0.1375, -0.0164, -0.0085, -0.1443,  0.0518, -0.0269,
        -0.0267,  0.0187,  0.0460,  0.0780, -0.0230,  0.0079,  0.0070,  0.0204,
         0.1626,  0.0076,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1605e+00,  1.8025e+01, -2.0889e-01, -1.5524e-01, -2.3869e-01,
        -5.4914e-01, -1.0910e-01,  2.9645e-02,  1.4910e-02,  2.8549e-02,
         7.4033e-02, -4.4021e-02, -2.2405e-03,  2.5200e-02,  2.9757e-02,
        -6.6450e-02,  1.5814e-01,  2.7460e-01, -3.1612e-01,  3.7227e-02,
        -4.7623e-02, -1.2742e-01,  1.6574e-01,  3.1614e-02,  1.0444e-01,
         6.8892e-03,  7.5295e-02,  5.1937e-02,  9.6941e-02,  8.9489e-02,
         6.3546e-02,  7.8716e-02,  9.2824e-02,  6.1550e-02,  5.9864e-03,
        -1.0683e-01,  1.5471e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.6521e-01,  1.2801e+01, -8.5301e-01,  1.9961e-01,  4.6015e-02,
         4.3321e-02,  4.3398e-02, -4.7277e-02,  6.5611e-02,  4.2467e-02,
        -4.7881e-02, -9.9184e-02,  1.9866e-02,  7.8724e-03,  7.3324e-02,
        -4.2844e-02, -8.5612e-02, -3.4873e-03, -5.4029e-02,  1.5494e-02,
        -1.0625e-01, -2.6618e-03,  8.0519e-02, -2.1288e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2842e-01,  3.1483e+01, -5.0820e-01,  3.6502e-01,  6.7433e-01,
         3.5857e-01, -6.7932e-02,  5.2183e-03,  2.3503e-01, -1.6579e-01,
         7.9148e-02,  3.9088e-03, -1.2244e-02, -5.2960e-02,  1.9866e-01,
         1.0180e-01, -4.2577e-02,  6.4082e-03,  7.5928e-02,  1.8631e-02,
         3.4696e-02, -5.0935e-02, -5.0048e-02,  1.0242e-01,  1.2959e-01,
         3.5105e-01,  7.4940e-02,  4.5043e-02,  9.0504e-02, -2.1092e-02,
         3.9258e-03, -3.3231e-02, -2.5805e-02,  3.7212e-02,  5.0653e-02,
        -1.7361e-01,  5.1147e-02, -6.1549e-03, -3.3492e-02,  8.9411e-02,
        -1.1492e-01,  5.1570e-02, -5.8593e-02,  3.1546e-01, -7.1798e-02,
        -1.0105e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2483e-01, -5.3327e+01,  9.9781e-02,  6.7711e-01,  5.0555e-01,
        -3.8866e-01,  1.1725e+00,  2.3344e-01,  2.4383e-01,  1.4635e+00,
         1.5284e-01,  1.5909e-01,  3.0092e-01,  5.1875e-02,  1.2332e-01,
        -4.6983e-02, -3.5107e-01, -1.1350e+00,  2.3674e-01, -2.2612e-01,
        -1.0780e+00,  3.7630e-01, -4.4357e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8648e-01, -1.4069e+01, -7.9321e-02, -2.1878e-01, -8.9730e-02,
        -4.9337e-02, -1.6594e-02, -4.0711e-02,  4.2599e-03,  3.6819e-01,
         5.1775e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3140e+00, -1.1418e+02, -4.2766e+00, -1.0209e+00,  8.5058e-01,
        -1.4045e+00, -9.5679e-01, -3.9418e-03, -5.5758e-01,  1.7547e-02,
        -1.7809e+00, -1.5583e+00, -1.4862e+00, -2.9653e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2741e-01,  2.6427e+01,  1.0254e+00, -6.8962e-01, -1.7996e-01,
         1.1561e-01, -2.4849e-01,  2.6299e-03,  2.1787e-01,  2.4171e-01,
        -3.9923e-01, -1.3298e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6672e+00, -6.4670e+01, -1.9937e+00, -2.2608e+00,  1.4691e-01,
         3.0511e-02,  1.8508e-01, -4.3627e-01, -3.6357e-01, -3.0236e-01,
        -1.1138e-01, -3.7610e-01, -2.6267e-02, -1.9870e-01,  2.3175e-01,
         1.1554e-01,  5.5414e-02, -1.6675e-01,  1.1817e-01,  3.0603e-01,
         7.0084e-02, -8.5387e-02, -3.1117e-01,  2.1733e-01, -1.1182e-01,
         3.4428e-01, -3.7568e-01,  2.5000e-01, -4.2209e-02,  1.6673e-01,
        -2.3572e-01, -4.2861e-01, -1.6870e-01, -3.1784e-01, -8.4411e-02,
         1.8168e-01, -7.4836e-02, -2.0621e-01,  1.4513e-01,  1.3901e-01,
        -1.3639e-01, -1.0023e-01,  2.2367e-02, -1.2035e-01, -8.4605e-02,
        -2.8034e-01, -1.1790e-01, -7.1037e-02,  1.3525e-02,  3.8866e-01,
         5.7642e-02,  8.3471e-02,  1.4979e-01,  3.4823e-04, -4.9411e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 5.0995e-01, -4.6529e+01, -1.1844e+00, -4.3536e-01, -6.8060e-02,
         1.9473e-03,  2.6891e-01,  5.6449e-01,  2.3267e-01,  3.2318e-01,
         3.3108e-01,  1.2573e-01, -2.4590e-01,  1.6386e-01,  2.8317e-02,
         1.1889e-01, -5.1515e-01, -1.0881e+00, -1.5007e-01,  1.4367e-02,
         1.4398e-01,  6.9992e-01, -1.8756e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3627, 10.8132,  0.1037, -0.2155,  0.1352,  0.1182,  0.0692,  0.1124,
        -0.0978,  0.0308, -0.0400,  0.0281, -0.1756, -0.0616, -0.0210, -0.0242,
        -0.2390,  0.1656,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.8151, -65.4598,  -0.6385,  -0.9054,  -1.1574,  -0.5982,   0.7913,
         -0.2974,  -0.7592,  -0.1568,  -0.2243,  -0.6625,   0.2007,   0.4056,
         -0.0850,  -0.4636,   0.6396,  -0.5897,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4359e+00,  5.9913e+01, -1.8352e+00, -8.1915e-01,  9.4721e-01,
         3.8741e-01,  4.1105e-02,  1.5537e-01,  1.0352e-01,  4.0838e-01,
        -3.4376e-01, -4.8470e-01,  5.1650e-01,  1.9284e-01,  4.4878e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5730e-01,  5.5848e+00,  3.2586e-02,  2.3056e-02, -2.8167e-02,
         7.3015e-03, -2.6452e-03,  8.5893e-02,  3.7934e-02, -5.3230e-03,
         1.6856e-02, -3.0354e-02,  1.5518e-02, -1.6540e-02, -1.2337e-02,
         3.1440e-02,  2.4664e-02, -1.5826e-02, -1.1064e-02,  2.2168e-02,
        -2.3793e-02, -1.2232e-02, -7.6683e-04,  4.4357e-03,  2.0583e-02,
        -1.2932e-02,  4.8130e-02, -1.8387e-02, -9.0672e-02,  1.4044e-01,
        -4.3960e-03, -6.9825e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.3356e-01,  1.1011e+02, -2.6771e+00,  5.8317e-01,  5.8628e-01,
         4.6107e-01, -6.0436e-01, -7.4636e-02,  4.8304e-01,  2.8984e-01,
         3.2310e-01,  1.0001e-01,  2.5165e-01, -3.8596e-01,  2.1330e-01,
         4.5099e-01, -5.7121e-02, -1.0694e+00, -3.9363e-01, -4.2148e-01,
        -1.4279e-01, -1.3156e+00, -7.2632e-01, -5.4432e-01,  2.2922e-01,
        -4.3157e-02,  1.1528e-01,  1.8103e+00, -4.4993e-01,  3.0363e-01,
         4.4379e-01, -3.0024e-02,  6.2228e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8129e-01,  4.6851e+01,  2.4153e-01,  7.9948e-01,  4.0747e-03,
         8.3036e-02, -6.7935e-02, -4.0473e-01, -3.2011e-02,  2.3148e-01,
        -2.5250e-01, -6.4334e-01,  2.7152e-02, -5.6598e-02, -2.2271e-01,
         1.0263e-01,  2.4366e-01,  3.8886e-02,  1.7614e-01, -1.5351e-02,
        -6.1572e-02,  3.8238e-02,  2.3938e-02,  1.3819e-01,  1.6156e-01,
         6.0111e-02,  2.5816e-01,  2.8623e-01,  4.4576e-02, -8.8457e-02,
         3.2671e-01,  9.7751e-02,  2.6684e-01,  1.9719e-01,  3.9777e-02,
        -2.1030e-01,  7.2404e-03,  1.4010e-01,  1.4360e-01,  3.7502e-01,
        -2.7364e-02,  7.9997e-03, -5.4970e-03,  1.1981e-01,  2.6729e-01,
         8.9198e-02,  6.1908e-02, -6.9037e-02,  1.2053e-01, -8.6382e-02,
         2.0981e-01,  5.9452e-02,  1.1415e-01,  6.2784e-02,  2.7969e-02,
         2.7599e-01, -1.5645e-01,  9.9832e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.2741e-01,  2.7610e+01, -3.2668e-01, -7.4514e-02, -2.4156e-02,
         1.3170e-01, -1.7601e-01, -1.3957e-01, -4.7934e-02, -6.8879e-03,
         6.3648e-02,  1.4161e-01, -1.8325e-01, -4.9710e-01,  8.4278e-02,
        -1.7358e-01,  2.5767e-01,  9.4165e-02, -1.6249e-03,  1.9298e-01,
        -5.4080e-03,  1.8363e-01,  1.3142e-01,  4.1915e-01, -1.1301e-01,
         1.2733e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.7952e-01,  4.4847e+01, -7.5068e-01,  2.2358e-01,  2.4439e-01,
         8.9990e-01, -1.3025e-01, -1.1077e-01,  1.0086e-01,  2.2189e-01,
         9.9150e-03,  1.1049e-01,  1.1216e-01,  2.8819e-01,  1.4795e-03,
         1.0534e-01, -1.5333e-01, -4.7175e-02,  2.5111e-01, -4.0564e-01,
        -4.6831e-02, -8.9389e-02, -6.2280e-01, -4.9015e-01, -2.5800e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.9301e-01,  1.4137e+02,  4.6266e+00, -7.3853e-01,  6.5062e-01,
        -7.5671e-01, -3.2836e-01, -3.8221e-01, -4.5771e-01, -6.6101e-02,
         1.0487e-01, -7.5548e-01, -4.6536e-01,  2.0272e+00,  3.0686e-02,
        -9.3384e-01, -7.7977e-01,  4.8760e-01,  6.4939e-01, -1.8473e-01,
         2.3905e-03, -3.6148e-01,  7.0876e-02,  1.6274e-01, -1.1043e+00,
        -9.9048e-01,  3.1931e-01,  1.4167e-01,  3.9944e-01, -7.4135e-01,
         4.8021e-01, -8.5299e-01, -9.0111e-01,  2.2569e-01,  1.4762e-01,
        -9.2005e-01,  4.4770e-01, -4.0740e-01, -2.8982e-01,  7.7811e-01,
        -5.0924e-02, -1.0875e-01,  8.4706e-01,  3.9758e-01, -3.2013e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0325,  2.8996, -0.0435, -0.0095,  0.0037, -0.0379, -0.0292, -0.0089,
        -0.0159, -0.0064,  0.0061, -0.0080, -0.0116,  0.0072,  0.0103,  0.0345,
        -0.0301, -0.0106,  0.0087,  0.0159,  0.0108, -0.0209,  0.0418,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0151,  0.1216, -0.0059, -0.0010,  0.0020,  0.0088,  0.0035,  0.0019,
        -0.0010, -0.0007,  0.0009,  0.0029,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.0086e+00, -1.2991e+02, -8.8128e-01,  1.9431e+00, -3.2023e-01,
        -1.1209e+00,  1.8775e+00,  8.5037e-01, -8.9519e-01, -6.5360e-02,
         2.8838e+00, -8.0263e-01,  1.3266e+00,  4.5972e-01,  1.9810e-01,
         9.4491e-01, -1.0243e+00,  8.0172e-01,  6.8199e-01,  1.0562e+00,
         2.0444e-02,  1.7568e+00, -8.0786e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6416e-01,  1.5075e+01,  3.1509e-01, -1.5737e-01,  7.7390e-03,
        -1.0512e-01, -7.4923e-03,  1.1485e-02, -5.4508e-02, -1.2188e-01,
        -8.7221e-03, -6.7684e-02,  8.1192e-02,  7.8011e-02,  2.2895e-02,
        -1.3265e-02, -6.6598e-02, -1.6627e-01, -2.4117e-02,  5.8069e-02,
        -1.0991e-01,  5.9138e-02, -9.0690e-02, -1.4457e-01, -7.1286e-03,
         3.4836e-03, -3.9509e-02, -3.9642e-02, -5.5944e-02, -7.6413e-03,
         8.4440e-02,  6.0833e-02,  1.9335e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7408e+00,  1.2029e+02,  1.1358e+00,  6.6798e-01,  2.0424e+00,
         2.6335e-01,  4.6858e-01, -1.6372e+00, -4.3706e-01, -1.8609e+00,
         5.2053e-01,  1.9442e-02, -8.4121e-01, -8.1198e-01,  6.4905e-01,
        -1.1192e+00, -1.2063e+00, -9.3983e-01, -1.9459e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0804,  4.5242,  0.0106, -0.0411, -0.0506,  0.0520,  0.0235, -0.0241,
        -0.0134,  0.0171, -0.0403, -0.0227, -0.0463,  0.0047,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3496e-01,  7.1885e+01,  3.0860e-01, -9.2765e-01, -1.4810e-01,
         5.1152e-01, -1.4665e-01,  5.2377e-01, -3.9481e-01,  1.0084e-01,
        -3.5984e-01, -2.8414e-01,  1.6424e-01, -9.6237e-01,  3.7801e-02,
        -5.2230e-01,  4.2140e-01,  5.5163e-04, -3.7110e-02, -5.0274e-01,
         8.7835e-01,  2.8061e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8321e+00,  1.7229e+02,  2.3407e+00,  2.8359e+00,  7.7380e-01,
        -2.9436e-01,  5.3003e-02, -1.5700e+00,  6.9284e-01, -2.4037e+00,
        -1.5932e+00,  1.2381e+00,  1.1012e+00,  6.2668e-01,  6.3832e-01,
        -1.2281e-01,  7.5676e-01,  4.5950e-01,  4.8680e-03, -1.1910e-01,
        -2.2051e-02,  2.2837e-03,  8.4078e-01, -1.5805e+00, -9.2392e-01,
        -1.2031e-01,  6.4699e-01,  6.7438e-01,  1.1443e+00, -1.8165e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1371,  6.8254, -0.0703,  0.0389, -0.0121, -0.1052,  0.1532, -0.0772,
         0.0794, -0.0122, -0.0396, -0.0119, -0.0659,  0.1059, -0.0781, -0.1972,
         0.0435,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6850e-01,  2.0682e+01,  2.4082e-01,  3.1417e-01, -8.6827e-02,
        -3.6290e-02, -1.0241e-01,  3.9724e-01,  7.0690e-02, -1.7739e-01,
         1.5876e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.6301, -22.4465,  -0.3823,  -0.1525,  -0.1217,  -0.1410,   0.0731,
          0.1048,  -0.0906,  -0.1579,   0.2262,  -0.0646,  -0.0874,  -1.3318,
         -0.1467,   0.2862,   0.9770,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1890e+00, -4.8355e+01, -2.8343e-01, -1.5991e+00, -6.1185e-02,
        -3.7974e-01, -8.6730e-02, -8.0623e-02, -3.9884e-01, -1.9578e-01,
        -2.3087e-01, -3.1492e-01, -2.9277e-01, -4.1304e-01, -5.5980e-01,
        -1.8805e-01, -9.0485e-02,  3.1970e-01,  1.2739e-01, -1.8078e-01,
        -1.0792e-01,  2.0395e-02, -1.1335e-01, -3.2996e-01,  4.0918e-01,
         1.5083e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4760e+00,  2.6133e+01,  5.1182e-01, -3.9997e-01, -1.3999e-01,
         3.5870e-01, -4.0494e-02,  1.8761e-01, -2.1141e-01, -4.0706e-02,
         3.5573e-02, -4.5310e-01, -1.4561e-02, -9.3874e-02, -1.0184e-01,
        -4.1597e-02,  8.8057e-03,  8.2315e-02, -6.3973e-02, -9.8376e-02,
         5.6317e-02, -2.6234e-02, -5.9219e-02, -9.8126e-02,  1.7325e-01,
        -7.9930e-02, -2.7007e-02, -5.0091e-02,  3.4320e-02,  1.7468e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2607e-02,  9.5184e+00,  2.7443e-01, -1.7567e-01,  1.6072e-01,
        -8.6867e-02, -8.1658e-02, -1.1470e-01, -1.4450e-02, -5.3308e-03,
        -4.7502e-02, -1.3982e-01, -1.0023e-03, -6.0532e-02,  3.7797e-02,
         2.9043e-02,  1.6302e-02, -1.3648e-02,  4.3537e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 5.4634e+00, -1.0400e+02, -1.5046e+00, -2.0685e+00, -6.8960e-01,
        -3.1722e-01,  4.4879e-01, -9.1568e-01,  6.2192e-02, -8.3539e-01,
        -1.0040e+00,  6.6594e-02, -2.0762e-01, -1.3430e+00,  4.4767e-01,
         8.3298e-01, -1.8656e+00, -6.5968e-01, -8.3029e-01, -1.1164e+00,
        -6.3014e-01, -5.1453e-01, -1.2295e-01,  5.0382e-01,  4.5092e-01,
        -3.2112e+00,  1.6529e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1584, 30.9490, -0.1656, -0.2251,  0.2903, -0.4518, -0.2445,  0.1355,
         0.6377,  0.1977,  0.2043,  0.2996, -0.5165, -0.5548,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5236e-01,  3.5417e+01,  5.6476e-01, -6.6630e-01, -2.3660e-01,
        -3.0450e-01, -1.5611e-01, -1.1144e-01, -2.5626e-01,  1.4340e-01,
         1.9326e-01, -2.0518e-02, -8.1293e-02, -6.5553e-02, -2.7159e-02,
        -3.4212e-01, -5.4665e-02, -2.4786e-01, -5.4484e-02, -1.5376e-01,
        -6.8459e-02, -1.5044e-01,  4.5887e-02, -1.9061e-01, -3.2652e-01,
         6.3130e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5529e-01,  2.3399e+01, -6.1234e-01, -3.2866e-01, -1.4423e-01,
        -4.1776e-01,  2.3626e-01,  5.9182e-02,  9.1930e-04, -9.7746e-02,
         1.4892e-01, -1.2578e-01, -1.9862e-01, -3.8228e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6719e-02,  2.2042e+00, -9.3180e-03, -2.2203e-03, -1.3203e-02,
        -4.4503e-03, -4.6913e-03, -1.6683e-02,  8.7751e-03,  8.6825e-03,
         2.4661e-03, -1.6730e-02,  5.9542e-03,  1.9748e-03,  1.3994e-02,
         3.9049e-03, -4.0382e-03, -2.7071e-05, -8.4166e-03,  1.4917e-02,
        -3.8823e-03,  5.4630e-03,  3.0865e-03, -7.4060e-04,  1.3658e-02,
        -9.9983e-03,  1.5193e-03,  8.6010e-04, -1.3892e-02,  1.1395e-02,
        -3.0197e-03,  1.9687e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7808e+00,  1.7429e+02,  3.9601e+00,  8.9001e-01, -3.8914e+00,
         1.6470e+00, -6.1513e-01, -6.4464e-01,  4.1152e-02, -8.9075e-01,
        -1.1563e+00,  1.6482e+00, -1.0506e-01,  6.6967e-01, -1.1757e-01,
         5.6116e-01,  1.7290e-01, -6.8773e-01,  2.8612e-01,  2.0575e-01,
         2.8208e-01,  1.1567e+00,  4.9746e-02, -1.0037e+00, -2.3359e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2268, 22.2448,  1.6109, -0.1372,  0.0311,  0.9243,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9508e-02,  2.0069e+00,  7.9881e-02, -2.4564e-02, -1.6538e-03,
        -2.6300e-02,  1.1919e-02, -1.4409e-02,  4.2344e-03, -8.2187e-03,
        -1.3961e-02, -4.4480e-03,  1.4070e-04,  2.7768e-02, -1.1753e-02,
        -4.2333e-03, -1.0432e-02,  8.1549e-03, -2.7975e-03, -3.6668e-03,
        -1.2149e-02, -2.2161e-04, -1.3890e-02, -1.4906e-03, -4.6805e-03,
         2.2166e-02, -4.3045e-03, -3.3455e-03, -6.8411e-03, -1.3845e-02,
         2.0615e-03, -1.8578e-03,  3.6653e-03, -1.6413e-03, -3.1684e-03,
        -6.6233e-03,  5.9753e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6157e+00, -1.0510e+02, -3.2146e-01,  2.0436e-01, -6.2986e-01,
        -9.8666e-01, -4.3352e-01, -1.6596e-02,  3.2300e-01, -1.3398e-01,
         3.9882e-01,  1.1848e-01, -2.9688e-01,  3.8162e-01, -1.7547e-01,
         4.4115e-01,  1.0643e+00, -5.2473e-02, -1.6750e-01, -1.8208e-01,
         1.1999e+00,  4.4859e-01,  7.1188e-01,  6.4500e-01,  2.2482e-01,
         8.0008e-01,  6.4599e-01, -3.8778e-01,  2.6399e-02, -2.0251e-01,
         1.6167e-02, -2.4102e-02,  2.5084e-01,  4.1642e-01,  2.3162e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.9050e-01,  1.4844e+01, -1.8616e+00, -1.5720e-01, -4.6189e-02,
        -5.3701e-04, -1.0259e-01, -7.2108e-03, -9.2230e-02,  2.5141e-02,
        -1.7365e-01, -1.8524e-01,  1.1741e-01, -1.0320e-01,  2.4885e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9292e+00, -1.4683e+02,  4.2699e+00, -2.2368e-01, -7.5201e-01,
        -1.0403e+00, -1.3813e+00, -1.1313e+00, -4.6706e-01, -5.9736e-01,
        -5.3108e-01, -1.3806e-02,  2.0222e-01,  2.4274e-01, -1.0175e-01,
        -1.2847e-01, -2.5337e-01,  5.4907e-01, -1.1232e+00,  7.3329e-02,
        -3.0343e-01, -1.3987e-01,  6.8468e-01, -4.6253e-01, -2.3838e-01,
        -3.2415e-01, -3.4601e-01,  1.4553e-01, -3.1888e-02,  3.2667e-01,
        -3.2269e-01, -1.1285e+00, -6.0091e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1715e+00, -7.9775e+01,  8.7097e-01,  2.9514e-01,  2.9948e-01,
         1.3688e+00,  9.8461e-02,  2.6956e-02,  9.8592e-02,  7.0932e-02,
        -1.3421e-01, -1.1137e-01, -2.1970e-01,  5.4682e-01,  4.2367e-01,
        -1.9336e-01,  2.6641e-01,  4.2079e-01, -4.3881e-02, -3.0827e-01,
         4.2251e-02,  1.4150e-01, -6.9164e-02, -8.2237e-02, -2.9125e-01,
         2.8059e-02,  6.0117e-01,  2.9182e-01, -4.4250e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 1.0529e+00,  1.6111e+01, -9.3925e-02, -1.1418e-01, -1.0639e-01,
        -5.2174e-02, -6.5256e-02,  5.8736e-02,  8.8771e-04, -2.6877e-01,
        -8.2009e-02,  5.0116e-02,  5.7763e-02, -1.0156e-01, -1.8388e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.3572e-02,  1.2052e+01, -1.2157e-01, -2.1333e-01, -1.0106e-01,
         4.6754e-02, -8.0275e-02,  1.3347e-02,  9.0370e-02,  1.8100e-02,
        -6.0272e-02, -1.9096e-02,  7.0012e-03,  1.1891e-02, -2.1519e-02,
        -2.9147e-02,  2.1585e-02, -8.6479e-04,  2.9507e-02,  1.9989e-03,
        -2.1358e-02, -5.8780e-02, -4.7075e-02,  1.2776e-02, -1.6567e-02,
         7.5140e-03,  1.3385e-03,  6.1306e-02, -4.8930e-02, -1.3484e-05,
         2.5841e-02, -4.1200e-02,  3.8832e-03, -1.6288e-02, -1.2401e-02,
        -1.2342e-02, -7.0937e-03,  1.0676e-02, -1.1797e-02,  3.7007e-03,
        -1.7458e-02, -2.8434e-02, -2.5826e-02, -2.9494e-02, -2.0989e-03,
        -4.2083e-02, -3.8511e-02, -6.8793e-02,  1.3080e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9342e+00,  2.6264e+01,  6.9370e-01, -2.3028e-01,  9.4300e-03,
        -1.3534e-01, -3.1223e-01, -8.1562e-05,  8.7560e-02,  3.6637e-02,
        -1.1924e-01, -5.6269e-03,  3.5737e-02, -6.5323e-02, -4.7572e-02,
        -2.7411e-01,  6.9726e-02,  3.8101e-02, -8.6410e-03,  2.5031e-02,
        -8.2516e-02,  1.1777e-01, -1.4984e-01,  1.1402e-02, -3.1316e-02,
         2.6202e-02,  1.4670e-02, -8.7580e-02,  1.0561e-01, -1.4372e-01,
         8.6725e-02,  9.5388e-02,  3.1731e-02,  4.4365e-02,  2.6090e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8199e+00,  1.7111e+02, -3.8326e+00,  3.1275e-01, -1.0967e+00,
         1.0927e+00, -2.4614e-01,  3.7470e-01,  8.9371e-01, -1.0223e+00,
        -3.3015e-01, -7.9886e-01, -6.5991e-01,  3.4876e-01,  2.2294e-01,
         1.7811e+00,  1.9711e+00,  1.4558e-02, -2.6225e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3565e+00,  1.1130e+02, -8.8553e-01,  3.7823e+00,  1.1833e+00,
         1.0536e-01,  3.9420e-01, -4.3045e-01, -9.1114e-01,  3.3920e-02,
         9.0995e-02,  3.0098e-01,  3.8423e+00, -2.0944e+00, -8.0036e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0722e-02,  2.6792e+00,  2.0935e-02,  1.1898e-02,  8.2813e-03,
        -8.6531e-03, -2.2505e-02, -1.3124e-02, -1.5445e-02,  2.6515e-03,
        -2.3488e-02,  2.4980e-02, -1.6092e-02,  6.8840e-03, -1.5589e-02,
        -4.6604e-03, -1.9855e-03, -2.7364e-02, -2.7364e-02,  3.4849e-03,
        -1.6480e-02, -3.5087e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8302e-01,  5.2913e+00,  1.2731e-02, -5.2847e-02, -3.0627e-02,
         9.4471e-03,  1.9347e-03, -1.6703e-02, -1.1461e-02,  1.0830e-02,
        -8.1993e-02, -9.1506e-03,  3.1083e-02,  1.4703e-03, -4.2538e-02,
         3.3280e-03, -3.2634e-02,  1.1761e-02,  3.8156e-02,  9.9418e-03,
        -1.4821e-02,  1.0930e-02,  1.7869e-02,  1.7745e-02,  3.4409e-02,
         5.0136e-03, -1.4766e-02,  7.1879e-03,  1.8415e-02,  6.8403e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7907, 16.5242, -0.8595, -0.3534,  0.0934, -0.1369, -0.1996,  0.1425,
         0.2245, -0.2635,  0.0382,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9510e-02, -7.5335e+00,  1.2937e-01,  4.5439e-02, -4.6598e-02,
        -2.7619e-02, -4.0625e-02,  1.1491e-02, -2.4099e-03, -1.0187e-02,
         4.0539e-02, -7.1420e-02, -5.4159e-02, -8.0918e-02,  6.6857e-03,
        -3.3849e-02, -1.9117e-02, -3.6099e-03,  4.1802e-03,  3.7656e-03,
         6.4741e-02,  3.6653e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3147e-01,  7.3751e+00, -1.2864e-01,  2.7186e-02,  3.0247e-02,
        -9.7018e-02, -1.8244e-02,  1.7034e-02, -2.9671e-02, -2.0717e-03,
        -2.9681e-02, -6.9259e-03,  3.9272e-02,  5.0936e-04,  2.2059e-02,
        -6.3040e-03,  5.9893e-02,  4.3540e-02, -2.4430e-03, -9.1850e-03,
        -1.5329e-02, -5.1465e-02, -7.1435e-02, -7.6056e-03, -6.2519e-03,
         9.1288e-03,  3.4784e-02,  3.8820e-02, -3.5764e-02,  2.8699e-02,
         1.8235e-02,  4.3823e-02,  2.3120e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8339e-01,  1.9931e+01,  6.6513e-01,  5.1861e-01, -5.9982e-03,
         2.0366e-01,  5.6378e-02,  1.1937e-01,  2.4147e-02,  1.6648e-02,
         4.1500e-02,  2.0187e-02,  4.4519e-02,  1.4283e-02,  4.5121e-02,
         2.3175e-02,  5.6799e-02, -1.0209e-01, -1.5518e-01,  6.1962e-02,
         3.0447e-02, -3.9863e-01,  8.3379e-02,  1.0480e-01, -2.8590e-01,
         2.4982e-01,  1.4074e-01, -9.3907e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0894e-01, -7.7231e+00, -1.1845e-01,  8.2008e-03, -4.2144e-03,
        -2.6185e-02,  1.0234e-01, -6.6352e-02, -1.9994e-03, -2.0237e-02,
        -8.2032e-03, -3.3467e-02, -2.7360e-02, -3.6030e-02,  5.1647e-03,
        -1.0145e-02, -5.0393e-02,  1.4804e-02,  1.5927e-02, -1.0314e-04,
         7.1619e-02, -1.2459e-03, -3.0433e-02, -4.8616e-02,  1.9795e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 2.7319e-01,  1.0682e+01,  1.7398e-01, -9.9226e-02, -6.7106e-02,
         3.6918e-03, -7.2839e-02, -7.3306e-02,  4.6298e-02, -1.2777e-02,
         5.6136e-02,  1.8143e-02, -4.6901e-02, -4.6829e-02, -3.9727e-02,
        -1.6978e-01, -7.0738e-02,  4.8354e-02, -4.3944e-02,  2.6896e-02,
         1.2750e-02, -3.5946e-02,  2.8116e-02,  1.9702e-02,  2.8521e-02,
        -6.0236e-02,  4.1778e-02, -2.1169e-02, -5.6815e-02, -6.4862e-02,
        -5.2550e-02, -3.4203e-02, -1.2818e-01, -2.9499e-02, -2.0253e-02,
        -1.0969e-02, -2.0236e-02, -8.0340e-03,  3.7444e-02,  3.1187e-01,
        -1.2365e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6182e+00, -5.5910e+01, -9.2568e-01,  5.6808e-01,  2.3939e-01,
        -6.9574e-02,  2.2840e-01, -6.1533e-01,  1.6249e-01,  6.9196e-02,
        -3.6971e-02,  3.2510e-01, -1.5698e-01, -6.2754e-02,  1.8379e-01,
         4.5670e-01, -1.1369e-01, -9.8548e-03,  1.2277e-01,  5.5524e-01,
         2.1181e-01,  6.3460e-01, -2.6606e-02,  3.3766e-01, -6.7518e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0129e-01,  8.5077e+01,  3.8558e+00, -3.0241e+00,  3.5893e-01,
         6.8907e-01,  8.4318e-01,  1.0999e+00, -2.0543e-01, -3.8439e-01,
         4.4458e-01,  9.5582e-01, -4.0390e-01,  2.3257e-01, -4.0944e-01,
        -3.1406e-01,  8.7055e-02,  6.6875e-02, -2.3408e+00,  2.6271e-01,
        -6.3573e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -2.0535, 134.1774,   1.0121,   0.1773,  -0.2870,  -1.7300,  -0.3753,
         -2.1306,   0.4442,   0.9760,  -2.2411,   0.4536,  -0.2027,  -1.6584,
         -0.4451,  -1.0553,  -2.3962,  -1.3029,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9287e-02,  1.5677e+00,  1.2385e-02, -1.8724e-02, -1.2111e-02,
         8.5121e-03,  1.8926e-03, -1.5745e-02, -9.2058e-03, -6.2514e-03,
        -5.6655e-03, -1.7913e-03,  9.7516e-04,  3.3028e-03, -5.0905e-03,
         7.9321e-03, -6.4883e-03,  3.0342e-03, -4.4973e-03, -7.7427e-03,
        -4.0996e-03, -4.1379e-03, -3.0127e-03, -1.8186e-03, -3.6950e-05,
        -3.8513e-03, -2.8478e-03,  1.3455e-04, -7.2828e-04, -4.2833e-03,
        -1.6058e-03,  3.7811e-04,  3.3237e-03, -1.5895e-03,  1.0010e-02,
         1.3063e-02,  3.9191e-02,  1.0077e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -3.1408, -47.9736,   0.1690,  -0.7560,  -0.1221,  -0.3250,   0.4357,
          0.2821,  -0.5867,  -0.1990,   0.4984,   0.1281,  -0.1722,  -0.0870,
          0.6580,  -0.3263,  -0.7970,  -0.6150,   0.4473,   0.7583,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1172e-01, -1.9859e+01, -2.0230e-01, -5.2865e-01, -4.4632e-01,
        -8.5532e-02, -1.1953e-01, -3.8988e-01, -1.1021e-01, -2.0857e-02,
        -2.4820e-01,  6.8658e-03, -1.0376e-01, -1.1974e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.5813e-01,  3.2931e+01,  1.4604e+00, -1.9954e-01, -2.8268e-02,
        -5.6064e-02, -3.0880e-01, -1.0957e-01, -5.5511e-02, -8.0667e-02,
        -1.5982e-01, -2.2058e-01,  1.1946e-01, -1.9046e-01,  9.1289e-02,
         7.0105e-02,  1.1349e-02,  1.4707e-02, -1.0166e-01,  1.0435e-03,
        -1.7086e-01,  1.0230e+00,  1.0097e-01,  4.2149e-02, -2.5409e-02,
        -1.3636e-01,  2.1873e-01, -6.8910e-02,  9.4816e-02,  4.7719e-01,
         4.8364e-02, -1.0774e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0176e+00,  1.1821e+02,  2.1952e+00,  3.3779e+00, -1.4258e+00,
         8.5089e-01, -3.9477e-01, -3.1483e+00,  9.5516e-01, -1.6492e+00,
        -1.7217e-01, -3.7435e-01,  2.7046e-01, -2.3171e-01,  5.2238e-01,
        -6.6226e-02, -3.0820e-01, -5.8281e-01, -1.0296e+00, -4.4770e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2148e-02,  6.3927e+00,  3.2473e-02,  2.3506e-02,  1.3001e-01,
         8.3292e-02,  1.6902e-03, -3.8855e-02, -1.9424e-02,  5.3082e-03,
        -2.3244e-02,  4.8930e-02, -5.2580e-03, -1.0948e-02,  3.6126e-03,
         8.6992e-03, -2.0195e-02, -1.0838e-02, -3.2334e-02,  3.1060e-03,
         2.6629e-02, -8.3810e-02, -6.8424e-02, -2.8279e-02, -3.8651e-02,
        -3.1049e-02,  3.7120e-03, -2.4305e-02, -8.1807e-02,  8.0196e-03,
         2.0525e-02,  3.4563e-02,  8.3987e-03,  4.2288e-02,  5.6261e-02,
        -2.6530e-03,  1.5395e-02, -2.6068e-02, -1.2982e-02,  1.8570e-02,
        -4.3993e-02,  4.0648e-02,  2.1384e-02,  1.2757e-02,  4.4398e-02,
         4.2763e-02, -6.1419e-02,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8832e-01,  6.0264e+01, -3.4261e-01, -8.5832e-01,  1.7318e+00,
         4.4522e-02,  8.8932e-01, -3.5002e-01,  1.2150e+00, -1.7895e-02,
        -1.0191e+00, -1.1553e-01, -8.6988e-02,  7.1471e-01, -3.0595e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.7768e-01,  1.5554e+01, -8.6712e-03, -2.4565e-02,  7.5089e-02,
        -1.4474e-01, -6.3121e-02,  1.2728e-03,  2.5653e-03, -3.9526e-02,
        -6.3567e-02, -1.1400e-02,  1.0288e-02,  1.5574e-01, -7.8858e-02,
        -3.4189e-03,  1.1384e-01, -1.0575e-02, -6.6520e-02, -8.5560e-02,
        -1.7704e-03, -4.7711e-02, -3.6471e-02,  2.8768e-02, -2.0354e-02,
        -5.8520e-03,  3.0416e-02,  6.3104e-01, -2.8315e-02,  6.1637e-02,
        -2.8615e-02, -5.6563e-02,  1.8990e-02,  1.2292e-03,  4.6338e-02,
        -3.5774e-02,  4.5786e-03,  4.2907e-02, -7.7934e-02, -1.4452e-02,
         2.2473e-02,  6.6792e-03, -5.6876e-02,  5.9150e-03, -2.5398e-02,
         7.9526e-03,  9.9063e-02,  2.7237e-02], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-3.0018e+00,  1.4369e+02, -5.5779e+00, -1.6130e-01,  2.8574e+00,
         4.8892e-01, -2.5516e-01,  1.0734e+00,  3.0177e+00,  1.1596e+00,
         8.3633e-01, -2.0524e+00, -1.7149e-01,  1.7661e-01,  2.1056e-01,
        -5.6145e-01, -2.1394e+00, -1.8240e-01,  6.9695e-02,  7.5133e-03,
         4.2197e-01,  1.2419e+00,  2.5910e-01,  1.0778e+00,  5.5780e-01,
        -1.8421e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7346e+00,  1.7100e+02, -2.3988e+00,  3.3584e+00,  5.4187e-02,
         7.8008e-01, -1.4617e+00,  8.5752e-01,  1.5715e+00,  4.2822e-01,
         5.1023e+00, -1.2712e-01, -3.1918e-01,  6.6033e-01, -5.2423e-01,
         1.6015e-01,  9.8207e-02, -1.7005e+00,  3.8497e-01,  1.1415e+00,
         1.0973e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1822e-01,  1.2807e+01,  1.4246e-01,  4.4528e-02, -1.0254e-01,
        -4.9035e-02, -5.5789e-03,  7.9929e-04,  2.0674e-02,  1.9136e-02,
        -2.3278e-02,  4.4589e-02, -1.2728e-01, -2.9768e-02,  3.4954e-02,
        -2.1363e-02,  1.1536e-01, -8.6917e-02,  1.3626e-01, -9.9287e-03,
        -3.1989e-02, -8.1621e-04,  9.4052e-02,  2.7856e-02,  1.9020e-01,
         1.3302e-03,  1.0842e-02,  1.8557e-02,  9.1475e-03,  9.2483e-04,
         1.3191e-02, -6.6935e-03, -2.4368e-02,  4.2013e-02, -5.6325e-02,
         1.1846e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7686e+00, -3.3617e+01,  5.5406e-01, -1.2461e+00,  1.0499e-01,
        -1.9302e-01,  4.4716e-01,  1.3256e-01, -2.1908e-02, -1.0145e-01,
        -7.1186e-01, -2.6915e-01,  1.0539e-01, -1.1579e-01,  3.0759e-02,
        -1.5287e-01,  2.7654e-03,  2.6125e-01, -2.6186e-01,  8.2940e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8934e+00,  1.2780e+02, -2.8006e+00,  1.8380e+00,  1.0767e+00,
        -7.3217e-01, -3.8259e-01, -1.3357e+00,  4.9861e-01, -9.2822e-01,
        -2.3768e-01, -6.0235e-02, -6.7678e-01,  4.7613e-01,  7.0862e-01,
         8.8571e-01,  3.2781e-01, -3.5436e-01, -7.7198e-01, -9.1718e-01,
        -1.9198e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0068e-01, -7.7931e+00,  9.6042e-03, -8.9117e-03, -5.5582e-03,
         1.8374e-02,  6.0644e-02,  1.3429e-01,  7.4716e-02, -3.8238e-02,
         3.9535e-02,  2.0828e-02,  3.0672e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6990e-01,  4.4981e+01,  8.2163e-01, -2.1532e-01,  9.8073e-03,
        -2.5310e-01, -6.4100e-01, -4.9524e-01, -3.6489e-01, -2.8993e-01,
        -6.3803e-01, -6.1305e-01, -2.3092e-01, -1.3403e-01, -6.1362e-02,
         5.5125e-02, -3.7311e-01,  7.5406e-04, -1.3024e-01, -1.1650e-01,
        -8.1699e-02,  1.9444e-01, -5.7547e-02, -1.7300e-01, -8.9152e-02,
        -2.2072e-02, -6.5282e-01,  3.4383e-02,  9.0949e-02, -1.8137e-01,
         3.9073e-01,  1.2103e-01,  2.1445e-01, -2.7884e-01,  6.2615e-02,
         3.6708e-01, -4.3743e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6011e-01,  6.0303e+01,  3.0205e+00, -4.8272e-01,  1.0363e-01,
         3.0471e-01,  5.1602e-02,  9.9056e-01, -3.2944e-01,  2.5030e-01,
         2.6391e-01, -1.9858e-01, -1.1929e-01,  1.3584e-01,  5.1199e-01,
        -8.5651e-02,  2.1063e-02, -3.5967e-02, -2.5425e-01, -3.9344e-01,
        -2.3791e-01,  4.9632e-02,  5.3672e-01, -9.6272e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1179e+00, -8.1379e+01, -1.1227e+00, -1.1080e+00, -6.8792e-01,
        -1.5166e-01, -8.0476e-01, -1.3791e+00, -1.4770e+00,  3.7573e-01,
         1.2755e+00, -6.9157e-01, -1.2086e+00,  3.5510e-01, -1.1872e+00,
         7.4823e-01, -6.6660e-01, -8.5377e-01, -2.0711e-01,  1.2030e-01,
        -1.6786e-01,  1.9372e-01,  1.2640e-02,  6.1620e-01,  9.0065e-02,
         1.0012e-01, -2.5440e-01,  1.9054e-01, -1.1765e-01,  1.0022e-01,
         1.7059e+00,  7.7442e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4776e+00,  6.5411e+01,  1.0011e+00, -1.3439e+00,  3.5818e-01,
        -1.2562e+00,  2.0419e-01,  3.3954e-01,  1.4964e-01,  2.3496e-01,
        -1.1308e-01,  2.3208e-01,  2.0531e-01,  4.8710e-02,  7.2152e-02,
         2.1542e-01,  2.5864e-01, -9.8440e-02,  3.1006e-01,  3.5783e-01,
         3.2109e-01, -1.6547e-01,  2.9193e-01, -8.8709e-03, -1.7184e-01,
         7.3108e-01,  4.4330e-01, -3.0730e-01,  1.0394e+00, -7.8490e-01,
         2.7292e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5917e-01,  1.7027e+01, -5.6361e-01, -1.1200e-01,  1.9720e-01,
        -2.0540e-01, -1.8109e-01, -4.6026e-01,  7.4539e-02, -2.9723e-02,
         2.0687e-01,  4.8080e-02,  1.6576e-02, -4.8917e-01, -3.0695e-03,
        -1.2682e-01,  5.3201e-02,  2.3692e-02,  7.1771e-03,  9.0889e-02,
        -1.8236e-01, -2.2427e-01, -1.1295e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2655e-01,  8.5680e+01, -1.3266e-01, -9.9242e-01, -3.7017e-01,
         7.2019e-01, -2.8536e-01, -3.5711e-01,  2.8646e-01,  6.0731e-02,
         2.8288e-01,  2.6022e-01, -4.2926e-01, -1.1697e+00,  6.6787e-02,
         6.2558e-01,  5.9440e-02, -2.4789e-01,  1.0135e-01, -8.0703e-02,
        -8.8054e-01, -1.7877e-01,  2.6401e-01,  7.8816e-02, -5.7860e-01,
         8.7483e-02, -2.0089e-01,  7.9307e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-3.2170e+00,  7.7345e+01, -9.1218e-01, -5.1549e-02, -3.2451e-02,
         1.7563e-01,  1.8251e-01, -4.8593e-01, -4.5377e-01, -4.1530e-01,
         1.3444e-01, -3.3730e-01, -4.6005e-01,  3.6237e-01, -1.9094e-01,
         5.8727e-01, -1.7626e-01,  1.5901e-01,  5.3674e-01, -5.3906e-01,
        -8.3474e-02,  2.7001e-02,  3.5933e-03, -3.2975e-01,  3.4631e-01,
        -2.8820e-02,  6.8081e-01,  4.7034e-01,  3.2600e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2227e-01,  9.3132e+00, -3.4715e-02,  8.6804e-02,  3.4553e-01,
        -1.4647e-02, -9.0258e-03,  4.4255e-03, -4.9794e-02,  1.6363e-02,
         1.0734e-03, -6.1367e-02, -6.5039e-02, -1.9107e-02,  5.2522e-02,
        -3.4232e-03, -6.6047e-02,  2.4723e-02, -4.6376e-02, -4.5867e-02,
        -1.7344e-02, -1.2267e-01, -9.2869e-02,  6.0065e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0156e-01, -3.2510e+01,  1.1293e-01, -7.0328e-01, -3.3009e-01,
        -7.6903e-01,  1.3139e-01,  3.4005e-01, -4.8416e-01,  1.0569e-01,
        -5.2761e-02, -4.3279e-01, -1.5612e-01,  9.0604e-02,  1.8661e-01,
        -6.5177e-02,  9.8120e-02, -9.1475e-03, -1.2875e-01, -1.4639e-01,
        -5.0361e-01,  1.1602e-01,  3.2474e-02,  6.2876e-02, -1.1203e-01,
         1.0067e-01,  2.3543e-01,  2.4064e-02,  1.1833e-01,  1.4809e-01,
         1.8204e-01,  8.7002e-02,  2.6406e-01,  1.8841e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5800e+00, -9.4072e+01, -1.2311e+00, -1.1498e+00,  3.2160e-01,
         1.3901e-01, -1.3452e-01,  1.9018e-01, -1.9916e-01, -7.9905e-01,
         4.2546e-01,  7.2456e-01, -1.9131e-01,  3.0764e-01, -3.0927e-01,
        -2.2805e-01,  2.4720e-01, -2.5122e-02, -4.1120e-02, -1.0385e+00,
         7.7866e-01,  5.0053e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2639e+00,  1.1647e+02, -3.7360e+00,  2.5525e-01, -3.1803e-02,
        -4.3370e-01,  1.3838e-01,  1.1265e+00,  2.4972e-01,  6.6514e-01,
         2.9318e+00, -4.9261e-01, -2.0956e-01, -1.1047e-01, -3.3063e-02,
        -4.5152e-01, -5.1276e-01,  1.3402e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6363e+00, -1.7475e+02,  2.6886e+00,  1.7875e+00, -3.4307e-01,
         1.0464e+00, -6.5814e-01,  2.7263e-02, -4.9797e-01, -8.3387e-01,
         5.3146e-01, -2.3233e-02,  9.4009e-01,  3.2632e-01,  7.3334e-01,
         7.8758e-02, -1.2896e-01,  2.3351e+00,  2.3619e+00,  1.1674e+00,
         2.3501e-01,  3.3371e+00, -1.3298e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.0298e-01, -1.4306e+02,  3.8372e+00,  8.0199e-01,  5.9946e+00,
        -8.3002e-01, -1.0476e+00,  1.1353e+00,  1.4532e+00,  7.6474e-01,
        -5.4754e-01,  1.9441e-01,  9.1394e-01, -8.7946e-01, -5.6873e-01,
         2.3105e-01,  7.8149e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  1.0153, 117.4570,   4.9457,   0.5044,  -0.6348,   0.9174,   2.0975,
          0.4758,  -0.3886,   1.8902,   1.3186,  -0.2427,  -0.2422,   0.5937,
          0.5164,  -0.4664,  -3.2291,   1.5103,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3358e+00, -7.9135e+01,  3.6692e+00,  2.2008e+00,  5.1603e-01,
         1.4711e-01,  1.4970e-01,  2.1107e-02,  6.8677e-02, -2.4382e+00,
         5.5803e-02,  6.4147e-01,  2.4670e-01, -5.5042e-01, -4.9235e-01,
        -7.2300e-01,  6.0061e-01,  1.5846e-01, -1.3648e-01, -3.3896e-01,
        -1.6788e-02,  6.5587e-02, -6.3047e-01,  3.0799e-02,  4.1506e-01,
         8.6682e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8722e-01,  6.7078e+00,  1.5121e-01, -7.4891e-02, -1.9905e-02,
        -1.3548e-01, -1.6694e-02,  2.7074e-02,  3.2788e-02, -7.2033e-02,
        -5.8281e-02,  8.0710e-04, -6.0788e-02, -2.4665e-02, -2.7164e-02,
         4.3717e-04,  9.9575e-04,  8.2384e-03, -1.6271e-02, -4.3566e-02,
         4.3525e-03, -5.5649e-03,  8.9041e-03,  1.3650e-02, -2.5621e-02,
         1.7255e-02, -3.7030e-02,  3.5907e-02,  2.8897e-02, -1.8529e-03,
        -4.9764e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0625e-01,  1.5665e+01, -1.4710e-01,  6.0500e-02,  3.7804e-02,
         1.9283e-02, -1.3377e-02, -2.5141e-02,  3.3550e-02, -2.3618e-01,
        -5.2047e-02, -8.5258e-02,  2.2360e-02, -4.2233e-02, -1.6257e-01,
        -1.2300e-01,  8.5669e-03, -1.4547e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0151,  3.6836, -0.0729,  0.0084, -0.0278, -0.0140, -0.0552,  0.0617,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.6981e+00,  3.4492e+01,  7.0076e-01, -6.6759e-02, -1.4029e-01,
         1.0854e-01, -1.6174e-01,  2.1458e-02, -1.2723e-01,  2.7583e-01,
         1.4318e-01, -1.0526e-01,  2.8994e-02, -7.6854e-02, -3.0615e-02,
         2.7961e-01,  5.4455e-02, -7.5359e-02,  1.5631e-01,  6.6663e-01,
         4.5454e-01, -5.3061e-01,  2.4579e-02,  5.2374e-02,  2.0556e-02,
        -5.2582e-02,  2.4956e-02,  1.4572e-01,  2.5597e-01,  9.9333e-02,
         1.3142e-02,  5.4671e-02,  1.1754e-01, -1.8384e-02,  2.9930e-02,
         1.5391e-01,  3.0633e-03,  5.0705e-01,  1.1552e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3265e-01,  8.8090e+01, -1.3528e-01, -2.2130e-01,  3.1744e-01,
         4.4199e-01, -9.6845e-01, -6.9185e-02, -4.2094e-01,  2.0378e-01,
        -9.0697e-02,  7.5519e-02,  2.5704e-01, -6.1438e-01,  4.7167e-01,
         3.0118e-01, -2.3411e-01, -8.9351e-01, -1.7479e-01, -2.6500e-01,
        -2.2686e-01, -2.0864e-01, -3.9676e-01, -3.7311e-02, -3.4968e-02,
        -2.9110e-01, -2.3633e-01, -2.0424e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6307e-01, -3.5486e+00, -1.5360e-02,  2.4509e-02, -1.5510e-02,
         2.5961e-02,  5.3699e-03, -1.1509e-02, -3.5602e-02, -1.8774e-02,
        -3.7054e-02,  2.3993e-03,  2.8032e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4815e+00,  1.3776e+02,  1.7571e+00,  4.5696e-01, -1.0153e+00,
        -8.3882e-01, -5.6570e-01, -1.9557e-02,  4.8681e-01, -8.6115e-01,
        -1.5355e+00, -1.7070e+00, -8.9915e-01, -1.6515e+00,  2.6232e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0323e-02, -9.4690e+00, -1.5810e-01, -1.8720e-02, -1.0445e-01,
         1.1247e-01, -8.8349e-02,  9.5485e-02,  2.7015e-02, -4.1733e-02,
        -1.5341e-02, -2.7590e-02, -3.2941e-02,  1.4492e-02,  2.6976e-02,
         2.7990e-02,  4.4855e-04,  4.6827e-02,  2.3115e-02, -4.4731e-03,
        -1.2301e-03,  2.4983e-03, -8.6475e-03,  6.0866e-02, -1.0612e-01,
        -7.9560e-02, -2.7161e-02, -1.0848e-02, -2.4891e-02, -4.9797e-03,
        -6.8716e-03,  3.0320e-03,  2.7139e-03, -4.3442e-03, -2.2894e-03,
        -1.5853e-03,  5.9542e-02,  9.1536e-03,  2.6609e-02,  1.3622e-02,
        -1.6641e-02,  7.5531e-02, -2.8750e-03, -1.1290e-04, -1.6386e-02,
        -1.7253e-02,  5.2542e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2524e-01, -4.1157e+01,  3.5963e-01, -1.9502e-01, -3.9228e-02,
         4.6957e-01,  7.5074e-02,  2.2863e-01, -3.5362e-01, -1.2145e-01,
         6.2959e-04, -8.5523e-02,  7.3046e-02, -4.9710e-02,  6.2801e-02,
         1.3739e-01, -1.4972e-01,  1.0647e-01,  1.7839e-02, -1.8958e-01,
         5.1375e-02,  5.8143e-02, -1.9987e-01, -3.4348e-02,  4.0467e-02,
         3.4907e-02, -3.0079e-01,  1.5190e-02,  2.5956e-02, -6.6090e-02,
         4.1044e-02, -6.1248e-03,  4.3260e-02, -7.3472e-02,  1.0601e-01,
         2.6501e-02, -1.8596e-01, -2.0654e-02, -5.3222e-02,  1.7773e-01,
         5.6955e-02,  7.0722e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9756e+00,  1.3729e+02, -1.4186e+00, -1.5879e+00,  1.7455e+00,
         1.2771e+00,  1.5139e+00,  4.3433e-01,  2.6478e-01,  3.3755e-01,
        -3.7973e-01,  1.9302e+00,  4.8089e-01,  1.0626e-01,  6.9746e-01,
         1.3423e+00,  1.0522e+00, -2.1961e-01,  5.7909e-01, -6.9763e-02,
        -1.5010e-01, -8.3474e-02,  5.3033e-02,  1.9802e-01, -7.7404e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1241e-02,  2.6243e+00,  3.9123e-02,  8.8277e-04, -8.6119e-03,
         9.2175e-03,  3.5944e-02, -5.2355e-03, -6.8152e-03, -8.4817e-03,
        -8.9777e-03, -9.9531e-03, -1.2199e-03, -1.7075e-02, -1.5890e-02,
         1.4413e-02, -1.5215e-02, -1.2713e-03, -1.1934e-03, -4.9180e-03,
        -2.5873e-03, -1.5546e-02,  2.5580e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2197e-02, -3.3458e+01, -1.0687e+00, -4.4530e-01, -7.9041e-01,
         1.5694e-02,  6.5580e-03,  1.3317e-02, -6.9797e-02, -2.0492e+00,
        -3.6049e-01, -1.5295e-01,  2.4967e-01, -2.5187e-01, -6.7411e-02,
        -5.1195e-02, -5.8314e-03, -1.9240e-01,  1.0260e-01,  1.2032e-01,
         7.8656e-02, -1.5495e-02,  2.4303e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2541, 55.4427, -0.9160, -0.7558, -0.5262,  1.1260,  0.6667,  0.2725,
         1.1455,  0.4884, -0.6734,  1.1965, -0.1349,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.1025, -25.2777,   0.2524,  -0.0509,  -0.0866,   0.0469,   0.0507,
          0.1472,  -0.0901,  -0.0496,   0.0688,  -0.0530,   0.2572,   0.3546,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5567e+00,  9.7340e+01,  9.6248e-01, -6.3875e-01, -1.7731e-01,
        -1.6822e-01,  6.4989e-01,  2.5084e-01,  2.6594e-01,  3.7964e-01,
         9.3467e-01,  3.0070e-01,  2.3801e-01,  2.0761e-02, -1.9305e-01,
         2.6834e-01,  2.4930e-01,  1.5141e+00,  3.2362e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 4.6157e-01,  9.7072e+00, -9.4640e-03, -8.3474e-02,  2.3306e-02,
         2.8124e-03,  6.8518e-02,  5.4480e-02, -7.7994e-02, -1.0167e-01,
        -2.3015e-02, -8.1603e-02,  1.9452e-02,  1.3756e-01, -5.4826e-02,
        -1.1492e-01, -2.2909e-02, -1.3798e-03,  5.2722e-03,  9.2149e-02,
        -2.4163e-02,  1.0276e-02, -3.1538e-02,  3.5810e-02, -2.2869e-01,
        -1.6658e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4537e-02,  1.9704e+01,  3.8507e-02, -9.1695e-02, -3.0516e-01,
        -3.8355e-01, -1.3724e-01, -3.7327e-02,  3.6625e-02, -6.4388e-01,
         2.5711e-01,  1.1896e-02, -3.1180e-01, -5.1477e-02, -1.4186e-01,
         1.9022e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -1.4807, 147.5570,   0.2474,  10.0390,  -0.2268,   1.8111,   1.7757,
         -0.8199,  -1.8452,   2.9684,   1.8764,  -0.2522,   0.9228,  -0.3997,
          1.6547,  -1.4087,   2.5778,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([   6.7829, -146.5376,    4.6131,    1.5654,    1.1379,    0.9469,
           1.3674,   -4.4665,   -0.3176,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4605e-01,  2.6057e+01,  8.6296e-01, -1.1784e+00, -2.7817e-01,
         1.7318e-01,  1.9050e-02, -2.9300e-04,  8.8556e-02, -1.1516e-01,
         1.3792e-01,  2.5786e-01, -4.7658e-02, -1.7251e-01,  1.9916e-01,
         7.6634e-02,  3.8957e-02,  1.1197e-01,  1.0366e-01,  1.2982e-01,
         4.1551e-03,  2.1941e-01, -3.3933e-03, -8.7460e-03,  1.8593e-01,
         7.5163e-02, -9.7746e-02,  1.1972e-01,  1.0259e-01,  2.8289e-02,
         1.0619e-01,  6.1015e-02,  1.7160e-01, -4.0820e-02,  1.6954e-02,
        -5.3466e-02, -1.9505e-04,  2.5449e-01, -1.8162e-03, -2.7326e-02,
        -5.5086e-02,  7.5541e-02, -5.3557e-02,  1.6363e-02,  1.0126e-01,
        -4.6641e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8225e-01,  8.3371e+00, -1.0895e-01, -1.8877e-01, -5.0818e-02,
        -4.6998e-02,  4.8174e-02, -7.1387e-03, -7.7420e-02,  5.1459e-02,
        -4.3884e-02, -2.7591e-02,  1.4352e-01, -6.1905e-02, -9.3808e-03,
        -6.9596e-03,  4.1085e-02, -1.9982e-03, -8.3262e-03,  4.0279e-02,
        -3.4000e-02, -4.8312e-02,  1.4284e-02, -4.0779e-02,  1.4464e-02,
        -1.1635e-02,  5.5480e-02,  1.0331e-02, -5.5767e-02,  1.0130e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  -5.6954, -118.9100,    1.6550,    2.0780,    0.3680,    0.3598,
           0.6933,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9702e-02, -5.7039e+00,  2.6970e-02,  3.6302e-02, -1.1872e-02,
         5.6909e-03, -1.5533e-02, -3.9345e-02,  4.3096e-02, -2.4570e-02,
         1.7583e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5844e+00,  3.7674e+01,  1.0805e+00, -3.5848e-01,  4.5646e-02,
         4.2195e-01, -2.4003e-01, -1.4543e-01,  2.9356e-02,  2.0285e-01,
        -9.3875e-02, -1.3447e-01, -7.7897e-02, -6.6155e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2748e+00, -4.3802e+01,  2.6076e-01,  6.1738e-02,  6.9064e-01,
        -6.6905e-02, -1.3338e-01,  7.4999e-02,  4.3510e-02,  1.9929e-01,
        -1.0446e-01, -1.4263e-01,  4.6600e-01,  2.7473e-02, -1.7167e-02,
        -1.4620e-01, -1.2162e-01,  3.9982e-02, -2.5991e-01,  1.0733e-01,
        -7.7605e-01, -3.5504e-02,  1.2651e-01,  1.6345e-01,  8.9389e-02,
        -1.5970e-01, -7.7183e-02, -8.7012e-02, -8.4108e-02, -1.6704e-01,
        -7.0858e-02, -3.1532e-02,  2.0119e-01,  2.3498e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1716e+00,  4.3444e+01,  1.4728e+00, -9.5303e-01, -3.6375e-01,
         1.4981e-01, -1.7620e-01, -5.4165e-01, -3.0035e-02, -5.5841e-01,
        -1.5182e-01, -1.8640e-01, -1.2294e-01, -1.7182e-01, -1.0054e+00,
        -3.9602e-03,  1.2415e-01, -2.9878e-01, -6.3996e-02, -2.6694e-01,
         3.4639e-01,  8.6371e-02, -2.8010e-01,  1.8036e-01, -1.8108e-01,
         1.3961e-01, -5.9985e-02, -2.6196e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9652e+00,  1.3460e+02,  1.9115e+00, -1.7889e+00, -2.2379e-01,
        -8.4386e-02, -1.0825e+00,  3.0949e-02, -6.1912e-01, -1.7266e-01,
         8.6072e-02, -1.0603e+00, -1.9984e-01, -1.0111e-01, -4.8635e-01,
        -2.5055e-02, -6.6271e-01,  1.6341e+00,  5.4071e-01,  5.8495e-02,
        -1.1225e-01,  2.3748e-01, -3.6251e-01, -1.0133e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 5.5667e-01,  3.8322e+01,  4.8945e-02, -5.3798e-03,  7.6320e-02,
         2.4323e-01, -1.9941e-01, -1.1623e-01, -1.3136e-01, -6.7798e-02,
        -8.3729e-01,  6.7630e-02,  3.9866e-02, -2.3410e-03,  7.6230e-02,
         4.7789e-02, -1.2270e-01, -2.2768e-01, -1.6236e-01,  1.4419e-01,
         1.1127e-01, -1.4207e-01, -2.3137e-01,  1.9949e-04,  4.4210e-03,
         8.3809e-02,  7.6445e-03,  1.0580e-02,  1.5004e-01,  2.7779e-01,
        -3.1970e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.8896e+00, -1.6400e+02, -9.0328e-01, -2.2429e+00, -5.1409e-02,
        -1.3492e+00, -1.5623e+00,  4.7170e-02,  3.4378e-01, -5.0730e-01,
         6.9027e-01,  2.0859e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0239, 46.6419,  0.5061, -0.3823,  0.4333,  0.0603, -0.1517, -0.2236,
         0.2082,  0.3172,  0.0928,  0.1358,  0.8966, -0.2605,  0.4728,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8939e+00, -1.4206e+02,  1.0461e-02, -2.0218e-01, -3.9527e-01,
        -5.4469e-01, -6.7923e-01,  8.0938e-01,  8.4761e-01, -1.3176e-01,
        -2.3594e-01, -1.8250e-01, -4.8007e-01, -2.3186e-01,  1.8454e-01,
         7.4876e-02,  1.9776e-01,  1.9502e-01,  1.1110e+00,  5.1220e-01,
        -1.9652e-01, -3.5729e-02,  1.3775e-01,  1.3882e-01, -2.5231e-01,
        -6.8100e-01,  1.7883e-01,  9.6385e-02,  2.2553e-01, -2.7198e-01,
        -3.2553e-01,  3.4382e-01,  2.2878e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  5.8018, 185.4550,   3.1404,  -3.4090,   0.4204,   0.7241,  -1.2130,
          1.0820,  -1.5878,  -0.3784,  -0.8711,  -1.0964,  -0.5901,   3.1980,
         -1.6874,  -3.3778,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2419, 13.8593,  0.0267, -0.0392,  0.1003, -0.1033, -0.1840, -0.3509,
         0.1970,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4184, 99.0130, -6.3754,  1.2592, -5.8143,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9337e+00,  1.2292e+02, -2.4202e+00,  9.3862e-01, -3.7515e-01,
        -1.2059e+00, -5.9276e-01,  1.9781e+00, -1.7469e-04, -7.3427e-01,
        -1.9581e-01, -5.7540e-01, -7.9985e-01, -6.6626e-02, -7.2051e-01,
        -4.6496e-01,  3.9976e-01, -3.3627e-01,  4.8355e-01,  8.6687e-01,
        -6.4126e-02, -1.5338e-01,  2.1731e-01,  2.7047e-01,  9.5820e-02,
        -1.2704e+00,  7.1722e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -3.2227, -58.8364,  -1.4630,  -1.1035,  -0.2022,   2.8291,   0.8547,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1755e-01,  1.4412e+01,  1.3744e-01, -8.3329e-02,  1.4624e-01,
         8.2153e-02, -1.1801e-01, -3.5156e-02, -1.4319e-02,  4.2427e-02,
         1.5893e-02, -1.3744e-01,  9.2671e-03, -3.5227e-02,  1.3631e-02,
         4.9542e-02,  1.2401e-01, -1.1098e-01,  1.1718e-01,  2.9933e-01,
        -5.1791e-02,  2.8193e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7912e-01, -1.2142e+01, -5.0755e-02, -1.3001e-01, -2.5457e-02,
        -3.1566e-02, -6.6605e-03, -2.8846e-02,  1.6123e-02,  3.7674e-02,
         1.0312e-02, -2.5625e-02,  5.2521e-02, -6.1575e-02,  7.1418e-02,
         1.1309e-02,  2.7350e-02,  6.6065e-02,  4.1549e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8430e+00, -1.6658e+02, -3.3444e-01, -1.4757e-01,  7.6782e-01,
         3.9149e-01, -1.3617e-01, -1.9577e-01, -4.8913e-01, -3.5527e-01,
         1.0466e-01, -3.7666e-01,  2.2664e-02, -5.2486e-01,  8.5117e-02,
        -1.5612e-01,  4.3679e-01, -5.6256e-01, -2.7504e-01, -6.1902e-03,
        -1.0854e-01, -1.6472e+00, -3.7152e-01, -8.0877e-01,  5.5248e-01,
        -2.4674e-02,  2.0725e-01,  2.2232e-01, -2.3992e+00, -3.1411e+00,
        -2.8651e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-2.6137e+00,  8.1065e+01,  4.9777e+00, -6.8402e-02, -5.7833e-01,
        -3.6443e-01,  3.2491e-01, -1.8676e-03, -8.1884e-01,  1.0088e-01,
         5.0471e-01,  1.8161e-01, -3.6252e-01,  2.5783e-01, -7.7008e-02,
         1.9221e-01, -6.9633e-01,  6.7080e-01, -2.2307e-01, -1.9971e-01,
        -2.9504e-01,  6.1536e-02,  3.2818e-01,  7.0876e-02, -3.0158e-01,
        -2.1709e-01, -3.2646e-01, -1.4990e+00, -7.9374e-02, -4.0750e-01,
         6.3552e-01,  5.8894e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2627e+00, -1.2188e+02,  1.3633e+00, -7.8954e-02, -1.2473e-01,
        -3.0064e-01, -1.3687e+00, -6.3911e-01, -1.1266e+00,  4.1872e-01,
         1.8228e+00,  3.0323e-01, -3.7607e-01,  3.8143e-01,  4.4226e-02,
         4.3288e-01, -3.7137e-01, -9.7224e-01,  1.0430e+00, -1.4617e-01,
        -7.6984e-01, -7.9404e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5797e-01,  1.5665e+01,  3.7655e-03, -1.5876e-01, -1.7746e-02,
        -5.1960e-02, -1.7358e-02, -2.2823e-01,  1.3260e-03, -1.5168e-02,
         6.4529e-02, -3.0518e-02, -6.5717e-02, -8.9601e-03, -2.1505e-01,
         1.9600e-02, -7.0793e-03,  2.2110e-01, -3.6510e-02, -3.2731e-02,
        -9.7020e-02, -8.7082e-02,  1.8572e-02, -6.9725e-02, -5.1565e-02,
         8.0009e-02, -6.7424e-02, -2.5688e-01, -8.3173e-03,  3.8494e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5410e+00,  1.0208e+02, -1.6208e+00,  4.5370e-01, -5.4296e-01,
        -5.9585e-01, -4.8422e-01, -6.2089e-01, -3.5999e-01, -6.7959e-01,
        -3.0533e-01,  7.2974e-01, -3.0426e-01,  8.2957e-02, -5.6384e-01,
        -8.2247e-02, -3.3446e+00, -2.1581e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9329e+00, -1.3528e+02, -3.8380e+00,  8.8818e-01,  2.0058e+00,
         2.2993e+00, -9.3203e-02, -3.4236e-01, -3.9408e-01, -3.6574e-01,
        -5.3535e-01, -2.0019e-01, -3.9170e-01, -9.3253e-01, -9.1381e-02,
        -2.2942e-01,  9.8129e-02,  5.2280e-01,  6.8599e-01, -6.1531e-03,
         1.1626e-01,  1.2254e-01, -6.8744e-03,  8.7508e-02, -3.2772e-01,
        -7.9206e-02, -2.0874e-01, -2.3516e-01, -3.0537e-01, -2.8363e-02,
        -8.6374e-02,  1.0892e-01, -7.5984e-02, -1.2366e+00, -3.3395e-01,
         1.1443e+00, -1.1682e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2882e-01, -1.6458e+02,  3.8470e+00,  4.2042e-01,  4.1641e+00,
         5.7900e-01,  6.7996e-01,  1.6311e+00,  5.3023e-01,  6.1466e-01,
        -7.9182e-01,  2.5935e-02,  4.2214e-01, -6.8333e-01, -1.4527e+00,
        -8.3984e-02,  1.3712e+00, -1.0734e+00,  2.4653e-01, -5.0969e-02,
         1.3085e+00,  6.0977e-01, -1.2934e-01,  1.2875e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6463e-01,  1.3682e+02, -1.6773e-01,  4.6699e-01,  1.6947e+00,
         9.8639e-01,  5.3129e-01,  2.5683e-01,  4.2038e-01,  5.9428e-01,
         2.8141e-01, -1.0705e+00, -5.9891e-01, -1.4090e-01, -5.2658e-02,
         2.9470e-01,  2.2908e-01, -1.6985e-01, -4.5683e-01, -2.4543e-01,
         1.4626e-01, -6.8670e-01, -8.1740e-02,  3.2227e-01, -4.2114e-01,
         2.7664e-01,  2.2623e-01, -1.6361e-01,  3.5629e-01,  4.9333e-01,
        -9.9415e-01,  2.1763e-01, -1.3505e+00,  3.8164e-01,  9.5629e-01,
         6.6460e-01, -1.1506e-01, -1.1204e-01,  1.4128e-01,  5.0778e-01,
        -2.9800e-01,  9.7397e-02, -2.8611e-01,  6.3436e-01,  2.2447e-01,
         3.8345e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6206e+00,  9.8936e+01, -3.8738e+00, -5.5884e-01, -1.1182e+00,
        -6.9909e-01, -9.0318e-01, -3.8837e-01,  1.3527e-01,  3.5869e-01,
         1.8135e-02,  1.2303e-02,  5.5413e-01, -1.8261e-01,  4.0601e-01,
        -2.9993e-01,  6.9110e-01,  6.7374e-01, -2.2916e-01,  6.2763e-02,
         1.4647e+00,  2.5768e-01,  1.3068e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1930e+00,  5.2540e+01, -8.7542e-01,  2.3533e-01, -3.3132e-01,
         5.0846e-01,  1.3237e-02, -9.7608e-02,  3.5150e-01,  1.4225e+00,
         1.2305e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9065e+00,  7.8957e+01,  1.4154e+00, -6.9652e-01, -5.7662e-01,
        -8.7805e-01, -5.5495e-02, -4.1500e-02,  5.0448e-01, -1.4583e-01,
         6.4393e-01,  2.6502e-01, -6.6439e-02,  4.3892e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.3377e+00,  1.2028e+02, -2.0945e+00, -2.5833e+00, -5.7586e-01,
         8.9060e-02, -7.9294e-03, -3.9691e-01,  5.8250e-02,  4.6511e-01,
        -1.5739e+00, -7.9041e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5163e+00,  9.4493e+01,  9.9406e-01,  2.0563e+00,  8.9914e-02,
         1.1545e-01, -1.1831e+00, -5.6922e-03, -1.6839e-01,  3.8532e-02,
        -2.1822e-01, -2.0337e-02, -1.2855e-01,  5.1395e-02,  2.6925e-01,
         6.7622e-03,  1.4298e-01,  5.6683e-02,  3.4090e-01, -1.1425e-01,
        -1.4190e-01,  1.0400e-01,  6.7517e-01, -2.8783e-01,  8.3752e-02,
        -1.7554e+00,  1.3414e+00, -3.7790e-02, -3.2400e-01,  6.1264e-01,
        -7.7552e-02,  5.9842e-01, -3.9338e-02,  8.0737e-02,  3.9414e-01,
        -4.6284e-01,  2.6341e-01,  4.0769e-02, -1.8897e-01, -1.3047e-02,
         2.1653e-01,  2.0165e-01, -1.4673e-01,  1.6322e-02,  2.3978e-01,
        -2.9383e-02,  1.1213e-01, -5.5302e-02, -8.6619e-02, -2.4181e-01,
        -2.4292e-01, -4.5516e-02, -1.7073e-03, -1.6594e-01,  1.4290e-02],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-1.8238e+00, -5.3966e+01,  1.7088e-01, -2.6606e-01,  9.1104e-02,
        -2.4391e-01, -5.2859e-02,  7.2629e-02,  3.6951e-01, -1.4501e-01,
         4.7467e-01, -9.5059e-02,  1.1787e-01,  3.0473e-01, -9.6734e-03,
        -1.6873e-01, -2.3534e-01, -6.9232e-01, -9.7030e-02, -3.8760e-01,
         1.7213e-01, -1.3356e-01, -3.8821e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.9359e+00, -1.7395e+02, -2.6615e+00, -7.2627e-01, -1.5373e-01,
        -1.7024e-01, -2.3794e+00,  6.3497e-01, -1.9760e+00,  3.7181e-01,
         6.5966e-02,  6.7922e-01,  6.9277e-01, -6.6861e-01, -1.0858e-01,
         6.7468e-01,  1.9266e+00, -7.4333e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9875e+00,  3.6277e+01,  4.8760e-01, -2.9694e-01,  9.7660e-01,
         5.8207e-01,  3.7456e-01, -6.6424e-02, -1.1364e-01, -2.3647e-01,
         4.8495e-01, -7.6964e-05,  8.6512e-02, -3.3451e-01, -4.0647e-01,
         4.0711e-01, -2.3441e-02, -5.6233e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1870e-01,  2.1562e+01,  2.5486e-01, -1.4041e-01,  2.3515e-01,
        -1.0843e-01,  3.3790e-01,  1.9348e-01, -1.3808e-02,  1.1911e-01,
         1.2143e-01,  2.6698e-01,  1.3572e-01, -1.5300e-01, -5.7866e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2893e-03,  3.3492e+01,  5.1391e-02,  5.7909e-01, -1.2986e-01,
        -1.0903e-01, -7.0760e-02,  2.0236e-01, -1.4669e-01,  2.2404e-01,
         3.4998e-01, -1.2257e-02,  9.2263e-02, -1.3918e-03, -5.0414e-02,
         2.6301e-01,  4.0930e-02,  4.0750e-02,  2.7841e-02, -2.4329e-01,
         1.2631e-01, -1.8383e-01,  6.1526e-02,  3.1441e-02,  5.2619e-01,
         1.6073e-01,  4.3631e-02,  3.5092e-02, -4.6528e-02,  4.5457e-01,
         3.8052e-02, -2.6232e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5237e+00, -5.0601e+01,  4.5077e-01, -7.6733e-02,  1.8343e-02,
        -8.7956e-02,  5.7283e-02, -6.5980e-02,  4.2857e-01,  3.9078e-02,
         6.9820e-02,  3.5874e-01,  2.2076e-01,  1.0962e-01,  1.6292e-01,
         3.7725e-01,  9.4226e-02,  2.1510e-01,  3.0515e-01,  6.6659e-02,
         4.1191e-01,  1.0213e+00,  3.1373e-01,  4.3494e-01,  1.6207e-01,
        -4.3068e-03, -5.3429e-02, -5.3764e-01,  1.6171e-01, -3.4222e-02,
        -6.3503e-02,  1.9775e-01, -2.9968e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8112e-01, -7.7570e+01, -1.9670e+00,  3.0880e-01, -3.8765e-01,
         1.4832e-01,  8.1319e-02,  2.3722e-01,  2.9553e-01, -1.9090e-01,
         1.5635e-02,  1.4990e-02,  3.1748e-01,  1.5067e-01, -3.5634e-01,
        -1.2727e-01, -2.0395e-01,  1.1722e-01, -6.9318e-02,  1.8672e-02,
         6.2199e-02,  7.5361e-02, -1.8499e-02,  1.4220e-01, -2.3310e-01,
        -2.0233e-01, -1.6709e-01,  2.3481e-02, -1.4284e-01,  5.3574e-01,
        -2.6305e-01,  1.1213e-01, -2.7885e-01,  1.2715e-01, -2.2487e-01,
        -3.5853e-01,  1.1225e-01, -1.7207e-01, -1.6895e-01, -6.4268e-02,
         2.5016e-02,  7.4383e-03, -1.0767e-01, -9.4195e-03, -5.9101e-02,
        -1.6296e-01, -1.9924e-01, -7.4097e-02, -4.5083e-02, -2.4348e-02,
        -1.7265e-01,  1.0197e-02, -3.7522e-02, -1.3204e-01, -1.2358e-01,
        -1.0961e-01,  6.5329e-02, -5.9437e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7416e+00,  3.2195e+01, -2.1068e-01,  9.3957e-02, -2.4993e-01,
        -2.4381e-02,  3.3433e-02, -1.1315e-01,  2.5607e-02,  6.3159e-02,
         1.0573e-01,  1.8858e-01, -3.0960e-01, -3.1360e-01,  1.6116e-02,
        -7.2806e-02, -4.0160e-02, -6.8037e-02, -1.4872e-01,  7.0044e-02,
        -1.7000e-01, -1.3970e-02,  3.0109e-02,  2.2242e-02, -8.1878e-02,
        -1.0218e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7580e-02,  2.8314e+01, -5.5427e-01, -4.5032e-01, -5.1391e-01,
         1.0508e+00, -1.6787e-01,  8.3980e-02, -1.5934e-01,  4.6511e-01,
         2.8974e-01, -1.6869e-03,  1.6496e-01,  1.1327e-01, -4.4319e-03,
        -7.8448e-02,  1.5998e-01, -5.8825e-02,  4.7695e-01, -9.8939e-02,
         1.4159e-01, -2.0549e-01, -2.3592e-01, -3.2408e-01, -3.8967e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3078e-01, -8.0469e+01, -2.2464e+00,  5.4855e-01,  9.7322e-01,
        -1.0425e-01, -6.7296e-02,  1.1869e-01, -4.0035e-01, -2.4311e-01,
         1.2337e-01, -1.0104e-01,  3.7913e-01, -8.7011e-01,  3.3821e-01,
         8.0700e-02, -4.0211e-02, -3.4021e-01,  5.0915e-02,  4.2356e-01,
         1.5284e-01,  1.1471e-01,  7.6495e-02,  2.2218e-01, -6.7891e-02,
         8.0105e-02, -3.1880e-01, -1.7458e-01,  2.0339e-01,  1.0313e-01,
        -2.9046e-01,  3.7246e-01,  3.3376e-01, -5.2554e-02, -1.5520e-02,
         9.0943e-02, -3.0093e-01,  5.7534e-02,  1.3286e-01, -3.5015e-01,
        -5.9308e-02,  3.7013e-01, -2.0782e-01, -4.5473e-01, -8.5202e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7176e-01,  3.6116e+00, -2.7702e-02,  5.8982e-02,  1.5261e-02,
        -4.4424e-02, -3.2154e-02, -6.4662e-03, -1.7744e-02, -4.4570e-02,
        -5.1642e-02, -1.6272e-02,  5.6474e-03, -1.1604e-02,  1.2171e-02,
         1.0261e-01, -2.8092e-03, -1.2091e-02,  1.1200e-02,  8.8753e-03,
        -1.8382e-02,  1.8176e-02, -1.4395e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0071,  0.4496,  0.0139, -0.0105, -0.0019,  0.0082, -0.0050,  0.0106,
         0.0029, -0.0054, -0.0103, -0.0019,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-4.8077e-01, -8.5828e+01,  2.0499e+00,  2.2171e+00, -2.3745e-02,
         2.8266e-01,  2.1741e+00,  6.8161e-01, -2.2039e-01,  8.2010e-01,
         4.7656e-01,  6.8776e-01,  4.4071e-01, -2.8904e-01, -1.8198e-01,
        -3.7942e-01, -2.7895e-01, -1.8270e-01,  3.9798e-02,  6.2711e-01,
         4.6329e-01,  1.5833e+00, -2.3513e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.3551e-01,  2.0199e+01,  6.2543e-02, -9.7303e-02, -5.9497e-02,
        -1.9181e-01, -5.4666e-03,  1.0680e-01,  9.3724e-02,  1.9968e-01,
         5.8973e-02, -7.1461e-02, -8.3317e-02,  1.8781e-02,  6.2462e-02,
        -1.4786e-01, -5.3044e-02,  1.3756e-02, -6.5009e-02,  1.0720e-01,
        -4.2602e-03, -2.9319e-02,  2.0535e-02, -7.8550e-02, -2.1912e-02,
        -5.4864e-03, -1.4217e-01, -1.8030e-02, -1.3796e-01, -2.1513e-03,
         9.5879e-03,  2.2881e-01,  3.8753e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5340e+00,  1.9612e+02,  5.8309e+00,  8.2212e-01,  5.4257e-01,
         9.7691e-01, -7.3874e-01,  1.8909e+00,  9.3723e-03, -1.2491e-01,
        -1.1631e+00, -1.4354e-02,  1.0186e+00, -6.0115e-01,  1.8272e-01,
        -1.4216e-01, -8.2784e-01, -1.4959e+00, -1.8733e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.1906e-01,  1.9521e+01, -2.7228e-01,  1.1747e-01, -5.7632e-02,
         2.5032e-01,  1.0391e-01, -3.2302e-02, -4.6044e-02, -1.4010e-02,
        -9.8397e-02,  5.5620e-02,  1.0722e-01,  1.7121e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3546e+00,  1.8690e+02,  1.0356e+01, -7.8518e-01, -2.4982e+00,
         1.9037e+00,  9.6885e-01,  8.2106e-01, -2.7437e-01,  7.6076e-02,
         1.2994e+00, -6.0532e-01,  7.6574e-01, -1.4177e-01, -1.0782e+00,
        -2.4399e-02, -1.3041e+00,  3.6488e-01,  2.8130e-01, -5.5176e-01,
        -1.3851e+00,  9.4664e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1720e-01,  1.4531e+02,  1.2855e+00,  1.9842e-01, -3.5162e-01,
        -5.2816e-02, -4.3647e-01, -4.7724e+00, -2.2201e-01,  5.4025e-01,
        -2.5310e-01, -6.1500e-01, -1.5303e+00,  2.6470e-01,  3.1768e-01,
         3.5901e-02,  2.1706e-01,  1.2654e-01, -3.3459e-01,  5.9042e-01,
        -1.8941e-01,  3.6397e-01,  2.5560e-01, -3.4012e-01, -1.0428e-01,
         2.2085e-01,  5.7989e-01,  3.5196e-01, -3.3639e-01,  4.3724e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0108,  1.0730, -0.0179, -0.0125, -0.0059,  0.0126,  0.0151, -0.0153,
        -0.0037,  0.0066,  0.0011, -0.0059,  0.0031,  0.0057, -0.0041, -0.0042,
        -0.0209,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([  -0.2935, -119.7139,   -4.5688,    1.7339,    1.3636,   -1.2794,
          -0.2760,   -4.2531,    1.5990,   -1.3892,    4.1541,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8142e-02, -7.6627e+01, -1.1238e+00,  9.1357e-01,  1.5030e+00,
        -2.8651e+00,  1.8950e+00,  5.9657e-01, -7.7213e-02,  2.7860e-01,
        -6.0949e-01,  1.8521e-01,  5.3721e-01, -1.3954e+00, -4.4764e-01,
         4.0705e-01, -9.5399e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8152e+00,  1.3230e+02, -9.0089e-01,  1.7434e+00, -4.4272e-01,
        -3.6420e-02,  3.2766e-01, -7.0439e-01,  6.8595e-01, -1.0260e+00,
        -2.7911e+00,  6.0512e-01,  6.3372e-01,  1.4527e+00,  2.0853e+00,
         9.3538e-01,  1.6313e-02,  1.9308e-01, -1.9580e+00, -1.2613e+00,
        -5.1631e-01, -8.1630e-02,  1.5096e+00,  1.7887e-01, -6.4941e-01,
         4.6205e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5232e-01,  8.8645e+00,  7.7661e-02, -1.3185e-02,  5.3411e-03,
         2.1031e-01,  2.8758e-02, -3.6364e-03,  1.1788e-01, -4.7935e-03,
        -8.1878e-02, -5.7046e-02, -2.8550e-02, -1.6410e-02,  1.1126e-02,
        -2.2804e-02,  6.4818e-03, -1.0656e-02, -9.4682e-03, -3.7921e-02,
        -8.4255e-03, -7.2235e-03, -3.6406e-02, -2.0717e-03,  2.2656e-02,
        -7.4220e-03, -6.7726e-03, -1.1846e-04, -3.0795e-02,  2.5268e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4177e-01,  2.3830e+01,  7.2032e-02, -2.5476e-02, -2.0644e-01,
        -3.1242e-01, -1.2808e-01, -1.4421e-01, -3.3748e-03, -2.2955e-01,
        -1.1831e-01, -3.8154e-01, -1.4000e-01,  5.7238e-02,  4.5567e-02,
        -3.8784e-03, -6.6481e-02, -2.2377e-02, -1.7244e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-8.7786e+00,  1.0788e+02, -3.2580e+00,  4.9118e-02,  8.5409e-01,
        -1.0631e-01, -2.8309e-01, -6.4670e-01,  3.7073e-01,  1.9579e-01,
         1.6225e-02, -1.8450e-01,  2.3705e-01,  8.1682e-01, -1.4327e+00,
         2.8996e-01,  7.2672e-01, -5.4093e-01,  4.7393e-01,  4.8591e-01,
        -9.7604e-02, -9.0683e-02, -4.8591e-01, -6.7022e-01, -7.1481e-01,
         1.5340e+00, -4.0319e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8740e-01,  1.6354e+01,  7.8442e-02, -1.4776e-01, -1.1729e-01,
         1.0443e-01,  1.8138e-01,  4.5671e-02,  1.0199e-01,  1.1246e-03,
        -8.1050e-04, -1.9516e-02, -6.6877e-03,  8.1817e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2625e-02,  7.4086e+00,  9.1767e-02,  1.1084e-02,  1.0564e-02,
         9.4351e-02,  9.0254e-03, -6.6889e-04,  1.8460e-02, -1.3228e-02,
         2.1180e-02, -1.1409e-03, -1.6835e-02,  2.4170e-02,  9.4983e-03,
        -2.9429e-03, -2.0729e-02,  1.2378e-02,  1.4205e-02,  3.5830e-02,
        -1.1949e-02, -2.7806e-02, -2.7183e-02,  1.0340e-02, -2.5631e-02,
        -1.4347e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7544e-01,  1.7933e+01,  3.0134e-01, -2.5180e-01, -6.6930e-02,
        -4.6960e-01,  6.9643e-02, -1.4583e-02, -6.0999e-02, -1.3918e-02,
        -6.8138e-02,  5.1069e-02, -1.4745e-01, -5.6574e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.7478e-02,  6.2981e+00,  2.4798e-02,  2.9945e-02, -5.5612e-02,
         5.0305e-02, -1.2171e-02, -2.2928e-02,  8.1150e-03,  2.6155e-02,
         9.9100e-03, -1.7316e-02, -4.4840e-04, -1.4368e-02,  2.3538e-02,
        -4.5755e-04,  8.1492e-03, -3.3326e-02, -1.7730e-02, -1.2966e-02,
        -1.0139e-02,  9.6252e-03, -5.6775e-03,  6.0666e-03, -6.3536e-03,
        -4.7132e-03,  1.1554e-02, -2.2077e-03,  2.3203e-03,  3.5790e-02,
         6.5457e-02,  1.3744e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9431e+00,  1.0835e+02,  5.4809e-01, -1.4804e+00, -1.4643e+00,
         4.7477e-01, -2.6656e-01, -1.3056e+00,  3.5007e-01, -1.0793e-01,
         5.5905e-02, -1.3799e-01,  2.5524e-01,  3.8860e-01,  1.7025e-01,
         2.3818e-01, -1.4475e-01, -5.1707e-01,  1.0828e-01, -2.7565e-01,
        -1.8316e-01, -2.3793e-01, -9.1446e-02, -4.8227e-01, -1.8228e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1940, 26.4433,  1.0687,  0.6555,  0.0420, -0.1226,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2840e+00,  3.9404e+01,  8.0042e-01, -1.9321e-01,  2.5557e-01,
        -3.3227e-01, -9.5812e-02, -2.0103e-01, -4.9999e-02, -6.4501e-02,
        -1.0632e-01, -9.9879e-02, -9.1561e-02,  4.9115e-01, -2.1366e-01,
         2.4972e-02,  2.0749e-01,  5.4370e-02,  4.2632e-02,  1.4840e-02,
        -2.7403e-01, -1.2491e-01, -2.7269e-02,  3.4108e-02, -1.0043e-01,
         4.8697e-02, -4.8722e-02,  7.3211e-02,  8.0429e-02, -5.5175e-02,
         1.4449e-01, -3.9081e-02,  1.3127e-01, -7.9897e-02,  9.2269e-02,
        -2.5112e-01, -1.4457e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1630e-01, -1.4145e+01,  1.8967e-01, -8.3676e-02,  7.0152e-02,
         4.8351e-03,  3.9778e-02,  2.7381e-02,  2.6169e-02,  3.0095e-02,
         1.5328e-02, -1.3927e-02,  1.1414e-01, -4.6197e-03,  8.3026e-03,
         1.1083e-01,  7.5129e-02, -6.5393e-02, -4.3043e-02, -2.2844e-02,
         4.6265e-01, -4.9313e-02,  9.8717e-02,  5.1776e-03,  8.0244e-03,
         6.6488e-02,  7.3368e-03,  3.8754e-02,  2.5960e-02,  3.7860e-02,
         8.4701e-04, -1.3481e-02, -5.2723e-05, -1.5219e-02,  5.8665e-02,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.2073e-01,  1.6960e+01, -5.5213e-01,  6.2841e-01, -2.4691e-02,
         1.9357e-01, -2.9523e-01, -4.7555e-02, -8.9995e-02,  1.9126e-01,
         6.0908e-02, -5.4879e-01, -1.5785e-02, -1.1527e-01,  6.3385e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5005e+00, -1.5106e+02,  1.3361e-01, -1.3337e+00, -9.3235e-02,
         9.6049e-02, -5.9628e-01,  4.5139e-01,  1.6879e-01,  2.6993e-01,
         3.1512e-01,  2.9375e-01, -4.2168e-03,  7.5080e-02,  4.0602e-02,
        -1.3370e-01, -2.2205e-01, -5.9279e-01, -2.3627e-01, -9.2287e-02,
         2.6314e-01,  3.2872e-01,  2.1028e-01, -7.8558e-02, -4.9001e-01,
        -1.2775e-01, -2.6618e-01, -3.2501e-02, -4.7104e-01, -3.1783e-01,
        -4.9546e-01,  4.5125e-01, -1.1156e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6530e+00, -6.7151e+01,  2.8989e-01,  1.0734e-02,  2.1492e-01,
         1.8861e+00,  3.7521e-02, -5.7248e-01,  3.4065e-01, -1.0859e-01,
        -6.6951e-01, -6.6279e-01, -1.8446e-01,  5.6260e-01,  1.4698e-01,
        -8.6220e-03,  3.8869e-02,  1.2314e-01,  1.7839e-01, -3.7264e-01,
         3.8672e-01,  1.3578e-01, -2.6392e-01, -1.2501e-01, -2.0274e-01,
         4.5634e-02, -1.5021e-01,  1.3833e-01,  5.1133e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.9816e+00, -7.8329e+01,  1.4471e-02,  6.8138e-01,  2.9296e-01,
        -1.9778e-01,  3.3714e-01, -6.5522e-01, -4.7905e-01,  2.6037e+00,
         1.8979e-01, -1.1131e-01, -5.5896e-02,  2.0584e-01,  2.6489e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8012e-01,  4.6553e+01, -6.8431e-01,  4.7392e-01, -8.0670e-03,
         9.8441e-02,  1.0591e-01, -1.5960e-02,  2.9625e-01,  1.3753e-01,
        -2.0303e-01,  9.5713e-03, -4.4982e-02,  7.8233e-02,  8.3267e-02,
         9.8162e-02, -8.9126e-02,  2.7326e-01,  5.7538e-02,  1.9111e-01,
        -1.3183e-01, -1.0953e-01, -1.7499e-01,  5.7584e-02, -2.0359e-02,
         8.9952e-02,  6.4669e-02,  1.2240e-01,  5.1744e-03,  9.1438e-02,
         1.7974e-01, -1.4799e-01, -4.3391e-02,  8.2415e-02, -2.7105e-01,
         1.8433e-01, -2.5632e-02, -1.8227e-02,  3.7060e-02, -9.2719e-02,
        -7.3781e-02,  1.7561e-01,  1.0137e-01, -5.9558e-02, -1.3669e-01,
        -2.6318e-01, -6.7862e-02, -3.0035e-01, -3.1430e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.9379e+00,  1.6696e+02, -7.6286e+00,  7.0114e-01,  3.1450e+00,
        -1.1401e-01,  4.4139e-01,  9.0061e-01,  8.8316e-01,  3.5654e-01,
         1.2933e-02,  1.6467e-01,  5.6910e-01,  3.3719e-01, -9.4383e-03,
        -1.5857e-01,  8.2823e-01, -9.8412e-02,  7.9060e-01, -1.0131e+00,
         3.0200e-01,  5.5021e-01, -3.2675e-01,  1.3650e-01, -1.1626e-01,
         7.6156e-01, -1.0140e+00,  1.0014e+00,  7.2347e-01, -6.6175e-01,
         4.8573e-01, -8.2965e-01, -1.0870e+00,  4.3023e-01, -5.9741e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0132e+00, -6.6004e+01,  1.8486e-01,  3.2962e-01, -1.1475e-01,
         4.1566e-02,  5.0269e-01, -3.2510e-01, -6.8055e-01,  2.2423e-01,
         1.6764e-01, -1.3053e-01,  8.8603e-02, -1.2058e-01, -3.9580e-01,
         2.3857e-01,  7.3582e-01,  4.2461e-01, -1.8848e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0385e-01, -3.3012e+01,  2.8692e-01,  2.1619e-02,  9.1748e-02,
        -4.1528e-01, -1.8817e-01,  3.1811e-02,  6.0695e-02,  1.7377e-01,
         3.1595e-01,  8.0062e-02, -8.5493e-01,  5.4354e-01,  1.6789e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7615e+00,  5.4718e+01, -2.6379e-02,  3.8405e-01, -6.3353e-01,
        -2.2141e-01,  5.8835e-02, -2.3140e-01, -4.4607e-01, -1.7284e-01,
        -1.6544e-02, -2.9937e-01, -5.5036e-01, -1.5009e-01, -1.5210e-01,
         3.1309e-02,  6.8516e-02, -1.0095e-01, -3.3084e-02,  3.4055e-01,
        -6.4942e-01,  1.8590e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6302e+00,  1.1956e+02, -2.6707e+00, -1.2947e+00,  9.2492e-02,
         2.1444e-01, -8.2494e-01,  4.5941e-01, -1.7849e-01,  1.7331e-01,
        -5.8736e-01,  2.8090e-01,  7.4009e-01,  5.7823e-01, -2.8761e-01,
        -1.8026e-02, -3.2497e-01, -6.0072e-03, -4.1546e-01,  8.8085e-02,
        -1.6303e-01,  5.6586e-01, -1.3000e-01,  7.5766e-01, -5.1766e-01,
         9.1660e-02, -9.5524e-01, -4.7426e-01, -3.2116e-01,  2.3851e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2434e+00,  3.9041e+01, -5.3210e-01, -7.3679e-02,  5.7879e-01,
        -3.7634e-01,  3.9723e-02, -2.8328e-01, -3.1946e-02,  3.9273e-01,
        -3.6519e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2717e+00, -1.5157e+02, -1.7544e+00, -2.3959e+00, -1.7828e+00,
        -1.0933e+00, -5.8267e-01, -5.8941e-02,  3.7138e-02, -3.1522e-01,
         2.1390e-01, -1.1959e+00, -1.0507e-01, -8.3894e-01, -4.9564e-01,
        -1.2201e+00,  3.5436e-01,  3.5688e-01, -2.6249e-01,  3.2967e-01,
        -1.5813e+00,  1.3696e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9069e-01, -1.2800e+02,  1.1501e+00,  1.0914e+00, -5.2940e-01,
         6.4626e-01, -9.1499e-01, -2.1655e-01, -1.4189e-01,  1.8634e-01,
         1.0132e+00, -8.1791e-01,  2.3227e-03,  7.6515e-01,  5.6093e-01,
         1.1966e+00,  8.8442e-02,  1.1104e+00,  1.4808e+00,  6.6511e-01,
        -1.7939e-01, -1.3156e-01,  4.1510e-01,  8.3091e-02,  8.7693e-01,
         4.2565e-01,  1.4082e-01,  2.3576e-01, -4.4647e-01,  8.3498e-01,
         4.5464e-01, -1.6904e-01, -3.8062e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7612e+00,  6.0235e+01,  2.7349e+00, -6.7852e-01,  3.7358e-01,
         4.3590e-01,  7.2336e-02, -6.0704e-01,  1.0492e-01, -4.8028e-02,
        -4.3976e-02,  1.1880e-01,  1.7966e-01, -3.5646e-01,  1.1206e-02,
         7.9859e-01,  2.7063e-01, -2.5602e-02, -3.0810e-01,  2.8793e-01,
         3.7374e-01, -7.1554e-01,  1.0532e-02, -7.8691e-02, -6.9245e-01,
         2.3607e-01,  1.6025e+00, -1.0821e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9278e-01, -1.0921e+01, -3.5056e-01,  2.1971e-02,  7.7003e-04,
        -7.5832e-02, -1.8516e-02,  1.4048e-02,  1.2433e-01, -3.6724e-02,
         9.5184e-02,  9.2887e-03,  9.1730e-03, -2.9761e-02,  2.3392e-02,
         8.5686e-02,  1.7396e-02,  2.9751e-02,  4.1377e-02, -1.3071e-02,
         4.8891e-02,  2.1455e-02,  2.1633e-03,  1.0929e-01, -6.3409e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 3.0035e+00,  1.4876e+02, -5.4495e-01, -3.1325e-01, -1.8677e+00,
         1.5679e+00,  6.9940e-01, -1.0408e+00,  3.1926e-02, -3.4580e-01,
         7.6658e-03, -1.7720e-01, -1.9667e-01,  4.3120e-01, -3.0811e-01,
        -1.7303e+00, -5.5115e-01,  2.4132e+00,  3.7384e-02,  3.7661e-01,
         4.7552e-01,  2.8122e-02, -4.4763e-01,  3.7770e-02, -1.8884e+00,
        -7.1313e-01,  6.6729e-01, -1.9122e-02, -2.9843e-01, -3.9646e-01,
        -4.4176e-01, -5.8240e-01,  8.4121e-02, -4.5595e-01, -6.7003e-03,
         6.4383e-01,  3.1656e-01,  2.4407e-01,  8.7783e-01, -1.2439e+00,
         1.6359e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0486e+00, -1.6091e+02,  1.9153e-01,  3.6886e+00, -1.4540e+00,
         8.2283e-01,  7.1423e-02, -7.8247e-01,  4.3511e-01,  7.1863e-01,
         5.9807e-01,  7.3083e-01, -6.1504e-01, -3.4593e-01, -8.1593e-01,
         8.4296e-01, -3.1388e-01,  3.4307e-01,  1.3515e-01,  1.7479e+00,
         1.0134e+00,  9.9962e-01,  3.0192e-01,  5.5573e-01, -1.3942e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7037e+00,  3.4798e+01,  6.3081e-01, -7.5912e-01, -1.1603e-01,
         1.1991e-01,  2.2553e-01,  1.9889e-01, -2.1657e-01, -5.3867e-02,
         2.8687e-01, -1.1411e-02, -2.2785e-01,  2.8628e-02, -2.1387e-01,
        -1.9901e-01, -3.0079e-01, -4.9804e-04, -3.0720e-01, -1.4237e-01,
        -4.1789e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1468e-02,  1.9587e+00,  3.5617e-02, -2.9281e-02, -1.5326e-03,
         1.5748e-02, -1.0372e-02, -2.0508e-02, -9.7369e-03, -1.5357e-03,
        -1.2093e-02, -6.8127e-02,  8.8385e-03, -5.0655e-03, -7.8164e-03,
        -1.1234e-02,  3.4399e-02, -1.8086e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1648e-01,  3.9818e+00, -8.2722e-02, -1.7138e-02, -1.0054e-02,
         3.1745e-02, -2.0401e-02, -4.6307e-02, -2.4281e-02,  4.7910e-03,
        -1.1453e-02, -4.9222e-03,  6.4605e-03, -1.3396e-03,  6.0826e-03,
         2.1178e-02, -1.4790e-02, -1.8419e-02,  1.1696e-02, -1.6414e-02,
        -7.6083e-03, -4.2484e-03,  6.8844e-03, -3.9852e-03,  9.2092e-03,
        -6.9211e-03,  3.6013e-03,  2.7939e-03, -1.0208e-02, -6.2506e-04,
         1.4644e-02,  2.3508e-03,  2.2903e-03,  3.6440e-02,  4.0296e-03,
         1.1454e-02,  1.0366e-01,  2.7341e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6649e+00, -7.0718e+01,  1.8527e+00, -3.2434e-01, -2.0836e-02,
        -2.2960e-02,  2.9599e-01, -9.3838e-03,  1.8196e-01, -2.1408e-01,
         6.6992e-02,  4.2865e-01,  4.4382e-02, -1.7879e-01, -1.9261e-01,
        -1.2772e-01, -4.1907e-01,  1.5185e+00, -1.8982e+00, -3.7122e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0879e+00,  1.9102e+02, -2.0888e+00,  1.7562e+00,  2.9292e+00,
         3.1463e+00,  1.8322e-02,  4.3426e+00,  7.6224e-01, -8.4438e-01,
        -1.5814e+00, -3.0701e+00,  6.0363e-01,  1.8121e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4081e-02,  2.2485e+01,  1.1239e-01, -3.6357e-02,  1.4708e-01,
         1.4546e-01,  1.6441e-01, -9.8945e-02,  9.5104e-04,  9.7637e-02,
        -2.4190e-01, -1.9193e-03,  9.5193e-03,  8.7394e-02,  7.0054e-02,
        -9.3389e-02, -4.3072e-02,  1.4034e-01,  8.9336e-02,  3.9302e-02,
        -5.0707e-02,  4.7373e-01,  3.7365e-02,  3.5244e-01, -1.3196e-02,
        -7.2641e-02,  6.5678e-02,  4.0974e-02, -4.1048e-02,  2.9268e-01,
         7.1344e-02,  1.0339e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3131e-01, -8.5243e+01, -9.0232e-01, -1.5640e+00, -8.2565e-01,
        -2.2491e-03, -3.1982e-01,  1.5554e-02, -5.1704e-02,  1.3147e-01,
        -7.7264e-02,  9.2321e-01,  3.8314e-01, -9.5456e-02, -1.6777e-01,
         1.7794e-01, -2.9020e-02,  9.8010e-01, -6.7722e-03,  1.9158e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8864e-02,  6.0405e+00, -1.1289e-01, -4.0342e-02, -4.0824e-02,
         5.2175e-02, -1.8851e-02, -1.8413e-02,  1.3050e-02, -1.6606e-02,
        -2.6345e-03, -3.7402e-02, -1.2818e-02,  8.0561e-03, -5.6393e-03,
        -1.1339e-03, -1.3698e-02, -3.1738e-02, -5.4800e-02, -1.4327e-02,
         2.2185e-02,  1.4996e-02,  6.8407e-03,  7.9509e-02, -4.4193e-02,
         1.0559e-02, -6.0512e-02, -1.1131e-02,  3.7085e-02, -1.1896e-03,
         1.8175e-02, -7.1060e-03,  4.4156e-02,  3.7006e-02,  3.1033e-03,
         9.5862e-03,  5.6685e-03, -7.9591e-03, -7.5569e-03, -1.3628e-03,
         2.2655e-02, -1.1531e-02,  8.3845e-04,  6.9224e-05,  2.4198e-02,
        -1.6148e-02,  4.4634e-02,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([   4.1109, -108.9284,   -0.2667,    0.5242,   -1.3048,   -0.6882,
           0.2139,   -1.0253,   -2.0366,    0.1530,   -0.5695,   -0.9014,
          -0.1834,    0.9206,   -2.6964,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3979e-02,  1.3081e+01,  6.1146e-02,  1.0822e-01,  1.2194e-01,
        -6.2322e-02, -1.2051e-01,  1.5479e-01, -2.7248e-02, -2.8367e-02,
        -7.5925e-03, -1.4933e-02,  8.7134e-03,  1.7625e-02, -5.8038e-02,
        -9.0624e-02,  3.4962e-02,  3.5824e-02,  1.9903e-01, -4.6101e-02,
         3.0861e-02, -6.7075e-02, -1.5672e-02,  5.4758e-02, -2.3894e-02,
        -1.2647e-01,  3.0914e-02,  5.0760e-01, -1.1741e-01,  1.6570e-02,
        -5.8241e-02, -4.1345e-02, -1.0982e-02, -1.8688e-02,  4.7607e-02,
         2.6913e-02, -4.1509e-03,  1.2837e-02, -1.0745e-01, -1.2671e-02,
        -2.4512e-02, -6.2692e-02, -6.2743e-02, -2.5030e-02, -3.4891e-03,
        -1.0035e-01,  1.0832e-01,  4.7696e-01], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-4.1192e+00,  1.9202e+02, -2.4164e+00, -3.2913e-01,  4.5208e+00,
         4.8127e+00,  1.5923e+00,  9.8275e-01, -2.2719e+00, -2.2579e+00,
        -7.8775e-01,  1.3673e+00, -1.1415e+00, -3.8417e-01, -2.0812e-01,
        -3.0184e-01, -6.0499e-01, -5.8798e-01, -4.1828e-02,  4.1621e-02,
        -1.3249e-01, -1.5450e+00,  8.4896e-01,  6.7449e-01,  2.2528e-01,
        -2.7316e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7858e-01, -1.3375e+01,  2.6456e-01, -6.2736e-02,  1.6913e-02,
        -4.1798e-02,  9.5167e-02,  2.3943e-02,  3.1096e-03,  5.1706e-02,
        -3.4732e-02,  1.2598e-01,  4.4828e-02, -2.5616e-02,  8.0907e-02,
        -9.6569e-03, -4.7266e-02,  7.7791e-02,  4.6432e-02, -2.1624e-01,
        -4.3691e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7897e-02,  2.8609e+00,  5.7452e-02, -1.4645e-02, -8.1919e-02,
        -1.5002e-02, -8.3489e-03, -2.8197e-03, -3.8976e-03, -4.7206e-05,
        -1.5553e-02, -1.0180e-02, -3.7225e-03, -1.9053e-02,  5.1908e-03,
        -1.1421e-02, -3.3292e-03, -1.1490e-02,  1.7243e-02, -2.4667e-02,
        -8.1805e-03, -5.7849e-03, -2.3052e-02, -2.5084e-02,  5.8099e-02,
         1.5155e-03, -5.0056e-03, -9.0188e-03, -3.0466e-03,  2.4896e-03,
        -3.8137e-03,  5.1363e-03,  1.3181e-02, -1.9511e-03, -7.0150e-03,
        -1.7806e-03,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4376e+00,  6.9023e+01, -1.0989e+00,  1.8184e-01, -3.8503e-01,
         4.5288e-01, -3.9237e-01, -1.1326e-01,  3.5946e-01, -1.9691e-01,
         7.0832e-01, -5.5500e-02,  3.2855e-01, -9.9411e-02, -3.1392e-01,
         5.5509e-01,  2.7111e-01, -6.0043e-01,  8.6806e-01, -8.3604e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1250e-01, -1.0146e+01, -3.1107e-01, -5.9171e-02,  3.6668e-02,
         1.0975e-01, -4.6113e-02,  1.4315e-01, -1.8013e-01, -2.3486e-02,
        -1.2181e-01,  1.2141e-03,  9.4750e-02,  2.6728e-03, -2.5519e-01,
         3.8583e-02,  6.3981e-02, -1.3377e-02,  1.4300e-02, -1.7324e-01,
         2.2924e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1158e+00,  1.5346e+02, -2.5150e+00,  4.9907e-01, -1.2183e-01,
         2.4668e-01,  1.7367e-01, -6.0749e-01, -4.4035e-01,  1.4237e-01,
        -1.6240e-01,  5.5882e+00, -8.6715e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2203e-01,  1.1149e+01,  1.3912e-01, -7.3193e-02,  2.0881e-03,
        -7.9828e-02, -6.6924e-02, -5.1799e-02, -3.6077e-02, -1.0481e-02,
        -4.5084e-02, -6.8330e-02, -2.8003e-02,  3.2001e-02, -4.0079e-02,
        -2.1399e-02,  1.1461e-02, -3.4146e-02, -2.1527e-02, -1.8073e-02,
        -1.3710e-02,  4.7367e-02, -4.7459e-02, -3.1599e-02, -2.4483e-02,
         4.6952e-02, -3.9808e-02, -3.9121e-03,  2.0769e-02, -3.6163e-03,
        -2.1714e-02,  5.8523e-02,  1.7620e-02, -2.3981e-02,  2.7922e-02,
         8.1610e-02,  3.5909e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5985e-01,  2.3037e+01, -1.2654e-01,  1.3955e-01,  1.1293e-01,
        -1.3937e-01, -4.1058e-02,  4.9507e-02,  1.2991e-01, -4.7802e-02,
        -1.6396e-01,  1.7997e-01,  1.4153e-01, -4.4714e-02,  3.8467e-02,
         4.0314e-02,  9.9723e-02,  2.3097e-02, -5.9764e-02,  3.7463e-02,
        -3.0499e-02,  2.2959e-02,  1.6575e-01,  1.1226e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9226e+00,  1.1111e+02,  2.1004e+00, -1.0693e-02,  4.9976e-01,
         4.1507e-01, -4.0014e-01,  6.3735e-01,  2.0778e-01, -1.1092e+00,
        -9.7658e-01, -1.8452e-01,  1.1532e+00, -4.0329e-02,  1.2509e-01,
        -1.4941e+00, -8.8986e-01,  1.2977e-02,  4.8211e-01,  3.4681e-01,
        -2.8698e-01, -6.7169e-01,  6.2089e-03, -7.5649e-01, -2.6236e-01,
        -2.1617e-01, -9.2770e-02, -8.3072e-01, -3.7262e-02, -2.4629e-01,
        -7.1891e-01, -1.6487e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4235e+00,  4.4502e+01, -7.5241e-02, -1.6594e-01, -3.5312e-01,
        -6.2940e-01,  4.4327e-01,  9.9853e-02, -2.9392e-02, -2.0370e-01,
         1.8324e-01, -7.1342e-02, -4.0113e-02,  2.2903e-02, -8.6771e-02,
         1.0444e-01, -2.6515e-02, -5.3583e-02, -9.8629e-02,  1.7125e-01,
         3.2975e-02,  1.4597e-01, -5.5617e-02,  2.5995e-02,  1.2905e-02,
        -4.3362e-01, -3.0963e-01, -8.6337e-02,  7.3315e-02,  2.7302e-01,
         9.5770e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0502e-01, -2.6284e+01, -1.0688e+00,  7.2053e-02, -2.0434e-01,
        -7.0810e-03,  6.6992e-01,  1.3062e+00, -7.9818e-02, -6.7915e-02,
         3.5248e-02, -3.7226e-01, -1.7328e-01,  4.1121e-01, -1.0395e-01,
        -3.6319e-02,  1.5524e-01,  7.8667e-02, -7.9301e-02, -9.1506e-04,
        -2.1396e-01,  6.9386e-03,  7.6612e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4639e+00, -1.3273e+02, -1.4052e+00,  6.8101e-01, -1.8997e-01,
        -1.6697e+00,  3.2544e-01,  1.5205e+00,  6.1576e-01, -1.5194e-01,
        -7.1707e-01,  3.5028e-01,  5.4369e-01,  2.3291e+00,  5.0100e-02,
        -2.9375e-01, -7.4975e-02,  4.0956e-01,  3.0358e-01, -2.0620e-01,
        -2.6849e-01,  6.4748e-03, -5.6446e-01,  4.6613e-02,  2.4346e-01,
        -3.8630e-01,  5.1605e-01,  1.3778e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 5.5575e-02,  1.1254e+02, -3.6795e+00,  2.7758e-01, -8.0968e-01,
        -2.5592e-02,  2.6178e-01, -1.1609e-01,  2.5854e-01, -9.2240e-01,
         2.1766e-01, -3.9192e-01,  3.4203e-01,  3.6920e-01, -3.7431e-01,
         2.6756e-01, -6.6795e-02,  8.5186e-01, -5.7105e-01, -3.1775e-01,
         5.9239e-01,  2.8603e-01, -7.6564e-01, -3.4769e-02,  6.3139e-01,
         4.4298e-01,  7.2082e-01, -2.5716e-01,  3.4227e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0126e+00,  5.5154e+01, -3.5999e-01,  8.6708e-01,  1.0484e+00,
         1.8396e-01,  3.2439e-01, -3.8487e-01, -6.9038e-02,  3.7561e-01,
         3.9617e-01, -3.6809e-01, -1.4583e-03, -6.7155e-02, -7.0592e-02,
        -3.3296e-01,  3.5364e-02, -4.9955e-01,  1.2068e-01,  5.3586e-02,
        -7.3479e-02, -7.9799e-01, -1.0757e+00,  4.3864e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1634e+00,  1.3985e+02, -3.2012e-01,  8.2250e-01,  1.1912e+00,
         1.3399e+00, -4.5195e-02,  8.5308e-01,  1.6017e+00,  2.7788e-01,
         1.9520e-01,  7.9033e-01,  1.5367e+00,  2.5300e-01,  2.7182e-01,
         7.0778e-01, -6.9486e-01,  5.1190e-01,  4.1240e-01, -9.9978e-03,
        -4.1658e-01,  2.6058e-02,  1.8287e-01,  9.0594e-01,  4.4770e-01,
        -2.2713e-01, -1.1144e+00,  3.1077e-01, -5.1523e-01,  6.2085e-01,
        -4.3854e-01, -1.8273e-01, -7.8875e-01,  3.9030e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6062e+00,  2.1576e+02, -4.8613e+00, -4.1538e+00, -9.6594e-02,
        -1.3196e+00,  4.8332e-01,  1.1527e+00,  3.9922e-01, -6.3754e-01,
        -9.0357e-01, -5.1815e-01, -9.0668e-01, -1.9593e-01,  5.2983e-01,
        -8.4370e-01, -1.8626e+00,  1.4668e+00, -2.4846e-01,  3.6004e-01,
         6.1731e-01, -1.2862e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.8710e-01,  2.4441e+01, -3.0293e-01,  4.0335e-01, -6.7886e-02,
        -1.2669e-01, -1.2216e-01,  3.0223e-01,  6.1222e-02, -9.9752e-02,
        -3.9804e-02, -8.0509e-02, -1.1305e-01, -1.5187e-01, -1.5417e-02,
         2.1369e-01,  1.5025e-01,  6.3017e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.6688e+00,  1.8448e+02, -5.4026e+00, -8.4496e-01,  8.4310e-01,
        -1.1719e+00, -7.1609e-01,  1.0217e+00, -7.8380e-01,  1.2399e+00,
         7.4355e-02, -7.5508e-02, -9.1937e-01, -3.2793e-01,  7.5585e-01,
         2.5431e-01, -1.1830e+00, -6.9323e-01, -1.5710e+00, -6.8400e-01,
         3.8719e-01, -2.0626e+00, -5.0559e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  4.2001, 213.2077,   5.4405,   0.4425,  -3.9663,   0.3883,   0.3572,
         -1.4301,  -2.2763,  -0.4814,  -0.4518,   0.2690,  -0.5493,   2.2181,
         -0.3643,  -1.4722,   3.8598,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  -4.7302, -123.1187,    0.1750,   -1.5580,    0.2002,   -0.7426,
           0.3444,    0.9716,    1.2036,    1.5460,   -1.3873,    0.4937,
          -0.1479,   -0.3133,    0.4228,   -1.4317,    2.5179,   -0.4192,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9157e-01,  3.4216e+01, -1.1900e+00,  1.4811e-01, -1.9559e-02,
         1.1812e-01, -1.9013e-01,  1.8206e-01, -1.1852e-01,  8.1211e-01,
         1.4037e-01,  3.8387e-01,  2.6931e-01, -1.7285e-01, -2.6637e-01,
         2.9009e-01, -2.3878e-01,  1.8215e-01,  9.4245e-02, -1.4637e-01,
        -1.1737e-01, -1.3830e-01,  1.3229e-01,  1.6869e-02,  9.8389e-02,
        -7.0841e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0724e-02,  1.9972e+01,  1.9757e-01,  1.3112e-02, -3.6761e-02,
        -1.5551e-01, -8.5357e-02, -6.5127e-02, -1.4066e-02,  6.3517e-02,
         6.9659e-02, -1.0010e-01,  5.8411e-02, -1.3365e-01, -1.4377e-03,
         1.0535e-01,  8.7115e-02,  8.2900e-03,  6.2810e-02, -3.4805e-02,
         9.2440e-02, -1.0137e-02,  4.6678e-02,  1.0505e-01,  1.0556e-01,
         1.8264e-01, -6.8019e-02,  5.1119e-02,  2.4175e-02,  1.9998e-01,
         1.0247e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4703e-01,  7.0264e+00, -7.9630e-02, -4.8038e-02, -6.2723e-02,
         7.3575e-02,  2.1721e-02,  9.9286e-02, -9.4670e-03, -7.3789e-02,
        -1.1429e-02, -2.1591e-02,  1.3161e-02, -1.6168e-02,  2.5733e-03,
        -4.7274e-03, -5.3375e-02,  2.1492e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0050,  0.2981,  0.0017,  0.0043,  0.0058, -0.0078,  0.0038,  0.0050,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.1555e-01,  5.3374e+00, -2.5738e-02, -3.6865e-02, -6.3334e-02,
         1.7481e-03, -1.7598e-02, -2.4481e-04, -1.6202e-02, -5.3668e-03,
        -1.6881e-03,  1.8011e-02,  1.2162e-02,  4.8134e-03,  4.9182e-03,
        -7.0911e-03,  9.9679e-03, -6.6691e-03,  1.4192e-02,  2.3173e-02,
         9.5570e-03,  2.3910e-03,  1.2200e-02, -6.4069e-03, -7.8588e-03,
        -2.8789e-04, -1.5470e-02,  6.1344e-03,  1.8708e-02, -4.8783e-03,
         2.4221e-02,  1.2649e-02,  5.6207e-03,  9.0885e-03, -7.0167e-03,
        -5.2103e-03, -9.3916e-03,  2.4322e-02,  3.7550e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.3352e-01, -1.2712e+02,  3.7197e+00,  3.1261e-01,  1.4849e-01,
         2.8790e-02,  5.1028e-02,  1.1059e-01,  8.9008e-01,  4.7057e-01,
         2.5592e-01,  3.2293e-01, -3.4135e-02,  5.2804e-01, -4.3649e-01,
         6.7233e-01, -8.0147e-01,  1.6594e+00,  3.3337e-02,  6.9851e-02,
         1.1859e+00,  6.4189e-01, -2.5515e-01,  3.3246e-03, -9.4355e-02,
        -6.3842e-02,  2.2616e+00, -1.3088e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.7344, -12.7214,  -0.0293,  -0.4713,  -0.0442,  -0.0516,   0.1530,
         -0.0527,  -0.0275,   0.1508,  -0.0992,  -0.0185,  -0.2158,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5985e+00,  1.5188e+02, -3.3732e+00, -1.2484e+00, -3.0417e-01,
        -2.8148e-01, -4.7003e-01,  3.0620e-01, -9.7102e-02,  4.3265e-01,
        -7.4208e-01, -3.2352e-02, -4.7720e-01,  1.4657e-01, -2.1101e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4949e-02, -1.9110e+01,  2.4859e-01, -4.3138e-02, -1.4258e-01,
         1.4447e-01,  6.9249e-02,  1.2008e-02, -2.0153e-01, -5.6649e-02,
        -1.0296e-02,  2.5802e-03,  8.9178e-02, -2.2808e-01, -7.3993e-02,
         4.7718e-02, -2.1223e-02,  8.2213e-02,  6.1771e-02,  5.7741e-02,
         2.8528e-02,  1.0896e-02, -6.0075e-02,  3.3282e-02, -7.0388e-02,
        -9.8053e-02, -4.7497e-02, -9.1611e-02,  4.6978e-02, -5.4280e-02,
         1.6191e-02, -4.0172e-03,  6.8168e-02, -6.7134e-02, -7.6900e-02,
        -1.4275e-02,  4.6895e-02,  5.7859e-03,  4.8218e-02, -2.4349e-02,
        -6.8005e-02,  8.8434e-02, -9.4633e-02,  4.5995e-03, -4.7274e-02,
         1.3995e-02,  1.5007e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0324e-01,  1.9085e+02, -1.2711e+00,  1.7954e+00, -2.4767e+00,
        -1.2913e-01, -8.3329e-01, -1.1224e+00, -9.7737e-01,  5.7536e-01,
         5.9068e-01,  1.5461e-01,  4.9478e-01,  1.3358e-01,  9.7556e-02,
         1.8778e-01,  7.2442e-01,  1.0680e+00, -1.8111e-01, -4.3309e-01,
         7.9434e-02, -2.4659e-01,  2.8378e-01, -1.0318e-01,  5.2993e-02,
         2.1642e-01,  2.1711e+00,  2.8602e-01, -1.8392e-01,  9.0946e-01,
        -2.0063e-01, -2.1621e-01,  2.5667e-01,  1.1866e+00, -1.2774e-01,
        -6.9663e-02,  3.4237e-01, -1.7559e-01,  5.5346e-01,  4.7937e-01,
         3.1828e-01, -4.0790e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.6824e-01, -1.5491e+02,  5.5948e-01, -1.5595e-01,  7.9077e-01,
        -2.0179e-01, -4.7692e+00,  7.9897e-01, -7.9648e-01, -6.6785e-01,
         7.9955e-01,  2.4017e+00,  7.2326e-01,  3.8126e-02, -5.2594e-01,
        -1.1354e-02, -1.9883e+00,  4.4779e-01, -1.3591e-01, -6.4923e-02,
         1.0970e-02, -1.5980e-01,  1.2599e-01,  8.0327e-01, -8.0597e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2282e-02,  1.5055e+00, -7.9479e-03, -7.1715e-04, -2.3660e-03,
         4.4794e-03,  4.6213e-04, -1.6098e-03,  2.4191e-03,  1.2641e-03,
         2.5218e-03, -5.4574e-03, -3.9867e-03, -1.7707e-02, -3.7778e-03,
         4.4320e-03,  3.4205e-03,  2.8946e-03, -1.3281e-03,  9.2262e-05,
         4.7786e-04,  5.6893e-03,  2.8069e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9891e-02, -3.0111e+01,  1.8181e-01, -1.0153e-01,  2.3065e-01,
         4.3394e-02, -2.0204e-01, -3.2002e-01,  1.4724e-01, -3.2388e-01,
         1.7814e-01,  1.5928e-02,  6.6075e-03,  2.0639e-03,  1.0787e-01,
         3.9417e-02, -5.1346e-02,  1.8844e-01,  8.5175e-02,  1.8224e-01,
         9.6222e-02,  8.9863e-02,  2.2688e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0955,  0.9734,  0.0105, -0.0218, -0.0098,  0.0041,  0.0039,  0.0033,
         0.0205,  0.0011,  0.0061, -0.0058,  0.0057,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4034e+01,  1.9626e+02,  2.6115e+00,  5.4117e-01, -4.4847e-01,
        -1.3549e+00, -1.2394e+00, -7.5569e-01, -1.3294e-01, -3.7647e-01,
        -1.7145e+00, -4.8249e+00, -1.1298e+00,  1.4600e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6979e+00,  1.4024e+02, -2.7200e-01, -8.5074e-01, -1.5824e-01,
        -4.0191e-01,  3.9375e-01, -9.7290e-01,  7.2459e-01,  1.1744e-01,
        -1.9650e+00, -3.7990e-01, -1.0311e-01,  1.1854e-01,  6.1002e-02,
        -6.7141e-02, -6.7981e-01, -2.1837e+00,  1.7840e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-2.6236e-01,  6.7150e+01,  2.8644e-01,  3.5921e-01,  6.5550e-02,
         6.8079e-02,  5.0191e-01, -5.7129e-02,  2.2709e-01,  4.7136e-01,
         1.8104e-01, -1.0156e+00,  1.3257e-01,  7.1989e-01,  1.4880e-01,
         2.1082e-01,  5.0628e-01,  3.8044e-03,  4.6776e-01,  4.2943e-01,
         1.7561e-01,  4.2386e-01, -1.6345e-01,  5.9994e-02,  4.8111e-02,
         8.0440e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3644e-02,  5.5677e-01,  9.5840e-05,  6.6509e-03,  1.6475e-03,
         2.7201e-03, -1.3548e-03, -7.8953e-03, -6.0778e-03,  5.4331e-03,
         4.1975e-03, -4.3414e-03,  2.2950e-04, -1.9588e-03,  1.2649e-02,
        -4.0826e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5901e-02,  1.0616e+01, -1.3793e-01,  7.6106e-01,  8.6139e-02,
         2.8058e-01, -9.2756e-03,  2.5710e-02, -3.3093e-02,  5.4013e-02,
         6.7533e-02,  9.8281e-03,  1.0956e-02, -7.7687e-03,  1.3294e-01,
         1.4608e-01, -2.3393e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1660,  7.6232, -0.2105, -0.0243, -0.1288,  0.0663, -0.1077,  0.0401,
         0.0241,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5275e-01,  8.5197e+01, -4.9934e-01, -1.9938e-01, -1.7690e+00,
         2.9989e-01, -9.8817e-02,  9.5194e-01, -4.4627e-01, -2.3632e-01,
         2.4387e-01,  6.5168e-01,  1.4024e-01, -3.2987e-01,  8.3514e-01,
         3.9681e-01,  3.6463e-01,  6.2814e-01,  8.6227e-02,  5.5741e-01,
         2.9575e-01,  2.5873e-01, -2.5578e-01, -4.3001e-01,  2.9110e-01,
        -1.7367e-01, -3.4650e-01, -1.5542e-01,  3.2299e-01, -2.2224e-02,
         3.7019e-01,  9.9033e-02,  2.4068e-01, -2.8344e-01,  1.8338e-01,
        -2.0350e-01, -1.0427e-01,  1.1591e-01, -3.1790e-02, -1.8530e-01,
         1.6117e-01,  1.0437e-01, -2.3951e-01, -5.8042e-02,  2.3349e-01,
        -4.0237e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6582e-02,  5.5168e+00, -6.5606e-02, -4.5601e-02, -2.0482e-02,
        -5.7870e-03,  9.8564e-05,  2.7510e-02, -4.1995e-03, -2.1691e-02,
        -7.8832e-03, -1.7585e-02,  3.2066e-02,  9.9281e-04,  1.5104e-02,
         2.6203e-02,  2.7376e-02, -1.7361e-02,  5.7053e-03,  7.7135e-03,
         9.1070e-03,  2.4725e-04, -3.1572e-02,  1.2650e-02,  1.6170e-02,
        -1.6350e-02,  2.6913e-02,  3.2925e-03,  2.6799e-02, -6.6065e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  2.8585, 103.9712,  -1.0272,  -0.9773,  -0.1740,   0.8053,  -1.7654,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.9097, -43.5923,   0.7853,   0.0847,   0.0900,   0.0926,  -0.2992,
         -0.5395,  -0.4969,   0.6008,   0.0960,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6752e-01,  8.6283e+01, -9.9955e-03,  1.6870e+00,  4.9946e-01,
         7.5719e-01,  5.4959e-01,  7.2537e-01,  7.9327e-01,  2.2271e-01,
         2.5347e-02,  1.3461e+00, -3.1728e-01, -8.0476e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3855e-01,  8.9171e+01,  9.5154e-01, -1.7567e-01,  6.5621e-01,
        -8.5007e-02,  6.5893e-01, -3.6631e-01,  1.1806e-01, -5.9366e-01,
         1.5152e-01, -1.3665e-02, -1.6812e-01,  1.7274e-01, -1.2874e-01,
         1.1092e-01,  2.7787e-01, -5.6098e-03, -1.4450e-01,  3.5924e-01,
         5.2007e-01,  4.4678e-01,  1.8908e-01, -6.0519e-02, -1.5291e-01,
        -2.0425e-01, -3.8497e-01,  2.8602e-01,  7.8075e-02,  1.4395e-01,
         4.2390e-01, -7.4266e-02, -2.5942e-01,  9.9393e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1591e-01,  2.0867e+01,  2.6974e-01, -3.3210e-02, -3.6584e-02,
        -2.7754e-02,  5.9815e-03, -1.7286e-01,  3.5342e-02, -4.6076e-02,
         7.8925e-02, -4.8138e-02, -2.2377e-02, -8.5678e-02, -4.8758e-01,
        -2.9306e-02, -3.8094e-01,  1.1501e-03, -4.2058e-02, -1.2859e-02,
         2.6533e-02,  4.6842e-02,  3.7687e-01,  3.7316e-02, -3.0910e-02,
        -7.9586e-02,  3.5240e-02, -1.2995e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2486e+00,  2.2765e+01,  5.2992e-01,  2.2020e-01,  1.8443e-01,
        -2.3001e-02, -1.2815e-01,  8.0041e-02, -5.4971e-02, -1.1464e-02,
         6.6481e-02, -1.6843e-01,  1.7307e-01,  2.3971e-01, -2.0504e-01,
        -1.4577e-01,  3.4287e-01, -2.9625e-01,  3.8588e-02,  5.2909e-02,
         2.5514e-02,  4.4396e-02,  3.2579e-01, -2.4894e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 5.1289e-02,  4.9476e+00,  1.0232e-01,  1.0468e-01, -1.1271e-03,
         1.0456e-02,  3.7186e-02,  1.4893e-02,  6.6540e-02, -1.7278e-02,
         1.7357e-02,  2.3198e-03,  2.9611e-02,  8.0492e-03,  1.7163e-02,
        -1.4236e-02, -2.1613e-02,  2.3050e-03,  2.4296e-02,  1.3283e-01,
         1.2737e-02, -4.1196e-02, -2.9525e-02,  7.7549e-03,  2.5492e-02,
         6.1234e-02, -5.9265e-03, -2.6206e-04,  3.5310e-02, -6.1793e-02,
        -5.2819e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4205, 10.0620, -0.1272,  0.0356,  0.0590,  0.3973,  0.1247, -0.0139,
         0.0274, -0.0302, -0.3620, -0.0323,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 10.4489, 167.6137,  -1.5680,  -2.2839,   0.3726,  -0.7654,  -1.6364,
         -0.8059,  -0.2106,  -0.8477,  -3.3683,   0.2571,   0.6408,   1.9369,
          2.5342,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4360e+00,  1.3511e+02,  2.1763e+00, -2.2448e-01, -8.1956e-01,
         1.6501e-02, -3.4141e-01, -3.7381e-01, -5.5024e-01,  4.9697e-01,
         2.7982e-01,  5.2568e-01, -8.5551e-02, -4.1064e-01, -3.3332e-01,
        -2.6742e-01, -2.7197e-02, -1.0160e+00,  1.7790e-01,  4.2306e-01,
         2.0127e-01,  8.0003e-02, -2.5233e-01,  1.1511e-01, -3.9439e-01,
        -2.2735e-02,  1.8430e-01, -1.8354e-01, -2.4576e-01, -1.6859e-02,
         1.0440e-02,  1.5371e-01,  1.3546e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0964e+01,  1.8261e+02,  7.7420e-02, -3.1056e+00,  2.2828e-01,
        -1.9626e-01, -2.8648e+00, -1.3241e+00, -1.0736e+00,  1.3396e-01,
        -1.8361e+00,  2.9001e-02, -2.7950e-01,  1.1726e+00, -8.2100e-01,
        -1.7817e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1484,  1.9614,  0.0144,  0.0261, -0.0062, -0.0283,  0.0072,  0.0044,
         0.0238,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0684,  3.6929, -0.3045,  0.0401, -0.2666,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5819e-01,  2.9109e+01, -4.7153e-01,  1.4308e-01,  7.3072e-02,
        -4.0080e-01, -1.0128e-01,  9.0948e-02,  1.6132e-01, -1.2600e-01,
         2.1706e-01, -4.7852e-02, -1.9576e-01, -3.8200e-02, -7.4395e-03,
        -1.3255e-01,  1.2241e-01,  7.5201e-02, -1.2038e-01, -1.3170e-01,
        -9.3309e-03, -6.1159e-02,  1.3032e-01, -1.5638e-01,  5.2171e-02,
        -1.5925e-01, -2.8370e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2358, -2.1290,  0.0062,  0.0046, -0.0569, -0.0814, -0.0892,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6056e-01,  7.9296e+00,  5.8363e-02,  3.7675e-02,  3.2691e-03,
         2.1409e-02,  1.2716e-01,  2.9668e-02,  2.0808e-02, -1.1484e-01,
        -3.1474e-02, -2.5311e-02,  4.5131e-02,  9.9175e-02, -1.3819e-02,
         1.2168e-02,  4.2536e-02, -5.1674e-03, -4.3459e-02,  2.7731e-03,
         5.4672e-02, -2.0740e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1470e-01, -4.2875e+01,  2.2421e-01, -3.9036e-01,  9.8592e-01,
        -9.5246e-02,  1.4577e-02,  8.0234e-02,  1.8973e-01,  9.8850e-02,
         1.3539e-01, -2.8770e-01,  1.1049e+00, -2.3925e-01,  2.7642e-02,
        -1.0416e-01,  2.7308e-02,  2.0908e-01, -1.2889e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.3862e-02,  1.5354e+01,  4.2985e-01,  6.4478e-03, -1.8981e-02,
         1.2458e-01, -3.6393e-02, -6.1109e-02,  8.8324e-02, -4.8041e-02,
         3.6429e-02,  1.9579e-02, -1.0858e-02,  6.8165e-02,  2.8505e-02,
        -7.4873e-03,  4.0408e-02,  5.2088e-02, -5.3507e-03,  1.7532e-02,
         8.8619e-02,  2.9430e-02,  1.0743e-03,  1.0678e-02,  2.1484e-02,
         1.0802e-02, -5.1231e-02, -2.1272e-02, -5.0096e-02, -8.0480e-02,
        -2.9449e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-6.2235e+00, -1.3852e+02, -6.3091e+00, -7.8672e-01, -1.6481e-01,
         6.7276e-01, -5.8799e-01,  1.2330e-01,  6.5206e-01,  1.6459e-02,
         4.1512e-01,  4.1571e-01,  3.2848e+00, -1.1182e+00, -4.9931e-01,
         8.1820e-01,  3.3120e-01, -6.0314e-01,  2.5090e-01,  3.5852e-01,
         1.8931e-01,  4.9978e-01,  2.2266e-01, -1.0687e+00, -2.3358e-01,
         3.9844e-01,  1.0113e-01,  1.9025e-01, -4.7577e-01, -9.1993e-01,
        -2.6720e-02, -9.6066e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9456e+00, -1.3083e+02,  1.9304e+00, -6.3299e-03,  1.3825e-01,
        -2.4118e-01, -9.0010e-02, -3.9493e-01, -2.2675e+00,  4.4234e-02,
         5.6818e-01, -9.1576e-01,  3.4379e-01, -2.4423e-01, -8.3110e-01,
        -1.0142e+00, -5.4513e-01,  2.9367e-01,  5.1832e-01, -3.5037e-02,
         1.0007e-01, -7.3240e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2745e-02,  4.8967e+00, -5.7031e-03, -4.3067e-02,  3.9090e-03,
         2.1825e-02,  1.9683e-03, -9.8737e-03,  6.8476e-03,  6.0582e-03,
         5.8396e-04,  1.0335e-03,  2.9610e-03,  4.7865e-03, -6.4311e-02,
         1.1778e-02, -2.8251e-02,  5.7263e-02,  2.5643e-03,  2.1670e-02,
        -6.0667e-03,  5.1753e-03,  3.5331e-03, -5.2230e-03,  5.4177e-03,
         3.1921e-02,  4.6650e-03, -1.8419e-02, -3.1362e-03,  3.0543e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1951e-02,  2.7230e+00, -3.0865e-02, -1.4798e-02, -3.5359e-03,
         4.3198e-04, -3.9435e-03,  4.3598e-03, -1.2149e-02,  4.4562e-03,
        -3.9582e-03,  2.3884e-02, -8.9781e-03,  1.6657e-02, -6.8047e-03,
         4.4980e-03,  4.5798e-03,  9.3742e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0363e-01,  1.8349e+01, -8.8989e-02,  8.5161e-02, -1.6464e-01,
        -1.0146e-01, -1.6273e-02,  1.1644e-02,  2.6303e-03,  1.0339e-02,
         5.7379e-03,  1.3056e-02,  4.6009e-03, -3.7681e-02, -2.2304e-02,
        -1.8738e-01,  5.9805e-02, -1.7495e-01,  1.4346e-02, -3.3477e-02,
        -3.2245e-02,  1.1275e-01,  7.6421e-02,  4.2549e-02, -2.4285e-02,
        -6.2909e-03, -1.1080e-01,  3.3087e-03, -1.4090e-03, -1.1209e-02,
        -2.0261e-02, -6.7547e-02, -3.3666e-02, -2.9589e-02,  3.3593e-02,
         8.5696e-02,  1.4976e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7379e-01,  6.9418e+00,  3.5712e-02,  9.4249e-02, -4.8815e-02,
         3.2381e-02,  2.9257e-03, -5.0032e-03, -1.0738e-03,  3.4149e-03,
         6.5117e-03,  5.2967e-02,  3.4537e-02,  1.1538e-02,  5.1048e-02,
         2.1744e-03, -1.4387e-03,  1.1906e-02, -5.2011e-03,  2.0154e-02,
         3.5300e-02, -1.9078e-03,  1.5615e-02,  3.7180e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2118e+00,  6.0333e+01,  2.9240e-02, -7.8171e-01, -9.4255e-02,
         1.7861e-01, -3.1147e-01,  2.0415e-01, -2.5619e-03, -1.4400e-01,
         9.1833e-02, -1.9549e-01, -3.0586e-01, -2.9881e-02,  1.0220e-01,
         1.8136e-02, -2.4364e-01,  1.3687e-02,  5.8116e-02,  1.3242e-01,
         2.8799e-02, -4.7200e-01,  3.1788e-02,  5.1593e-03,  2.6331e-01,
         5.6902e-01,  8.6427e-02, -1.9960e-02, -1.9916e-02,  1.1950e-01,
         1.6407e-01,  8.5967e-02,  1.4669e-01,  2.1173e-01,  1.9310e-01,
         2.2056e-01, -1.4441e-01, -1.4189e-01,  1.0545e-01,  1.5382e-01,
         1.9391e-01, -1.4613e-01, -2.8390e-02,  3.6316e-02,  8.6759e-02,
         2.2535e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3751e-01,  1.7796e+01,  7.0195e-03, -1.1127e-01, -1.6659e-01,
         1.5226e-01, -3.0013e-01,  5.8325e-03,  6.5738e-02, -5.9146e-02,
         3.6337e-02,  1.0429e-01, -8.1330e-02,  2.7019e-02,  4.6697e-02,
         1.0336e-01,  1.8583e-01,  3.6897e-01,  3.2213e-04,  1.1079e-01,
         1.4564e-01,  1.1171e-01,  9.2315e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4825e+00, -8.3808e+01,  1.6213e+00,  1.2025e-01,  1.8917e-01,
        -2.4683e-01, -1.0866e-01,  7.9058e-02, -1.6088e+00,  5.9494e-01,
        -2.1882e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4659e-01,  4.5851e+01,  3.7279e-01, -2.5124e-01,  2.5818e-02,
         4.8245e-02, -2.2635e-02, -2.4418e-01,  2.0339e-01,  4.4649e-01,
        -7.3879e-02, -9.0146e-02,  3.3722e-01,  4.0709e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5727e-03,  3.5880e+01, -3.8231e-02, -5.9339e-01, -4.1343e-01,
         1.5837e-01, -1.2622e-01, -2.9589e-02,  7.5208e-02, -2.3313e-01,
        -1.0759e-01,  2.1812e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2347e+00, -7.0262e+01, -2.8459e-03, -2.3545e-01,  2.3007e-01,
         3.3764e-01,  2.8943e-01, -5.4345e-01, -4.3790e-01, -4.3313e-01,
         3.4497e-02,  3.7298e-02,  1.4436e-01, -2.5223e-01, -4.0732e-03,
         4.0882e-03, -1.0038e-01, -1.4154e-02, -1.3419e-01, -1.4714e-02,
         9.4338e-02, -9.1425e-02, -3.6691e-01, -6.7975e-02,  1.4015e-01,
         3.6448e-01, -3.8371e-01, -8.9971e-03,  1.3794e-01, -2.8175e-01,
        -5.3932e-02, -8.6383e-01,  3.1285e-02, -6.0005e-02,  7.0354e-03,
         3.2684e-01, -7.4900e-02,  3.7570e-02,  1.3126e-01,  9.4769e-02,
        -1.2360e-01, -1.8139e-01,  1.1035e-02, -4.6432e-02, -7.7943e-02,
         3.0640e-02, -7.5939e-02,  7.9190e-03,  9.5508e-02, -5.0098e-03,
        -4.2498e-02, -1.0294e-02,  1.1457e-01, -1.2410e-01, -1.0104e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-1.1260e+00,  5.0633e+01, -1.6739e+00,  4.4244e-02,  2.5483e-01,
        -4.7340e-03,  1.1162e-01,  2.0635e-02,  1.2854e-01,  5.0733e-02,
        -8.5232e-01, -2.5739e-02,  3.8245e-01, -2.2229e-01,  3.1220e-01,
         1.4690e-01, -1.6490e-02,  8.4816e-01, -3.7260e-02,  1.2414e-01,
        -3.4159e-01, -1.1105e-01, -3.1767e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5718, 19.0688,  0.1896, -0.1422,  0.2115,  0.1331, -0.2106, -0.0482,
        -0.0601, -0.0589, -0.2532,  0.0206, -0.2125,  0.0561,  0.0508, -0.1305,
        -0.0437,  0.6069,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1759,  2.9762,  0.0273,  0.0202,  0.0491,  0.0170,  0.0413,  0.0114,
         0.0210, -0.0272,  0.0075,  0.0203,  0.0366, -0.0289, -0.0083, -0.0077,
         0.0223,  0.0088,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4093e-02,  5.9743e+00,  7.4170e-03,  4.5790e-03, -5.1477e-02,
         2.2002e-02,  1.5881e-02, -3.9813e-02,  4.5094e-02,  2.2207e-02,
        -3.3515e-03,  6.9664e-02,  6.0601e-02,  8.6215e-02, -1.5296e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.6914e-03,  8.9195e-01,  5.9010e-03,  1.6161e-03,  5.4237e-05,
        -2.5501e-03,  5.4544e-04,  1.2295e-03,  1.0089e-02,  1.3249e-03,
        -2.8261e-03, -1.5434e-03,  2.1402e-03,  1.9190e-03, -1.4372e-03,
         3.1870e-03,  1.1869e-03,  2.8707e-03, -1.7954e-03,  1.0359e-02,
         1.1559e-02,  2.2884e-03, -1.2834e-04,  5.4833e-04,  1.3867e-02,
        -2.1764e-03,  1.6455e-02, -1.6578e-04, -7.8603e-04,  9.1404e-03,
        -7.0600e-03,  4.0867e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8134e+00,  4.6155e+01, -8.8761e-02, -3.5216e-02,  1.9862e-01,
         8.7590e-02, -1.5640e-01, -1.3337e-01, -8.2092e-01,  1.9434e-01,
         7.4340e-02, -9.6218e-02, -1.9124e-01, -2.7497e-01,  9.0499e-03,
         2.6613e-02, -9.1956e-02, -6.0147e-01, -1.5321e-01, -4.0412e-02,
        -1.5626e-01, -1.6815e-01, -6.4376e-01,  1.0507e-01,  8.3893e-02,
         1.9811e-01, -1.8176e-02,  6.9036e-02,  4.1321e-02,  2.3387e-01,
         4.8059e-01, -3.9265e-02,  1.9836e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8843e-01,  3.9868e+01,  4.7051e-01, -1.6893e-01,  7.3182e-02,
         1.6275e-01,  2.4199e-02, -8.1196e-02, -1.7754e-02,  2.6173e-02,
        -3.6143e-02, -1.6194e-01, -1.8099e-01, -1.1597e-01, -2.5512e-01,
         4.9249e-02,  6.7634e-02, -1.3166e-01, -8.0637e-02, -2.4906e-02,
        -3.3905e-02,  5.8675e-02, -5.9869e-02,  7.9681e-02,  2.7981e-02,
        -2.9023e-02, -5.2664e-02,  8.5352e-02,  2.1149e-02, -1.4649e-01,
         4.4491e-02,  2.1957e-02,  3.7930e-02, -1.0318e-01, -3.1633e-02,
         4.7966e-02, -3.1659e-02, -7.0817e-02,  3.9637e-02,  3.9197e-02,
         9.0436e-02, -5.6985e-03,  9.5143e-03,  8.0729e-03,  8.1103e-02,
         1.2239e-01, -1.9222e-02, -3.3093e-03,  7.1418e-02,  9.4164e-02,
         8.8310e-02,  8.8590e-02, -1.1834e-02, -2.7564e-02,  8.2839e-02,
         3.1935e-02, -1.1088e-01,  2.1927e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.3307e-01,  1.7949e+01, -4.7798e-01, -2.3490e-01,  9.0854e-03,
        -5.2756e-02,  1.5228e-02,  2.8952e-02, -1.3848e-01,  6.4649e-02,
         7.6766e-02, -5.5119e-02, -1.3470e-01, -1.8878e-01, -3.8040e-03,
        -2.7765e-02,  1.3667e-01,  6.0069e-03, -4.0361e-02,  4.4297e-02,
         2.6325e-02, -2.8737e-03, -8.6826e-02,  1.1284e-01, -2.1153e-02,
        -3.0742e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.4992e-02,  1.3814e+00, -1.3216e-02, -6.2133e-03, -1.1868e-02,
         5.9049e-03,  4.6887e-03,  1.6425e-04,  2.3374e-03, -3.2858e-03,
         1.9560e-02, -3.0921e-04,  9.8100e-03,  6.1362e-03,  2.2114e-03,
        -3.2999e-03,  2.5136e-03,  1.2077e-03,  1.0542e-02, -1.8511e-03,
        -2.8062e-03, -8.4664e-03, -5.6663e-03, -5.9745e-04, -6.1608e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9875e+00,  1.3974e+02,  5.9257e-01, -1.5363e+00,  7.2365e-01,
        -2.5565e-01,  8.5084e-01, -4.3770e-02,  2.6528e-02,  1.0597e+00,
        -7.8137e-03,  1.1237e-01, -6.1473e-02,  5.1943e-01, -1.6689e-01,
         1.2515e-01, -2.0526e-01,  7.8950e-02,  1.5008e-01,  8.0206e-01,
        -2.6297e-01,  9.5361e-01, -5.5676e-01, -3.0510e-01, -3.0565e-01,
        -8.7905e-01,  1.1310e+00,  6.6047e-02, -4.7590e-01,  1.4102e-01,
        -3.6782e-01, -6.6000e-01, -1.7123e-01,  1.3328e-01, -3.2433e-01,
        -3.8734e-01,  1.3575e-01, -7.0256e-01, -5.6167e-01, -1.9685e-01,
        -4.7282e-01,  1.4053e-02,  5.2136e-01,  4.1419e-01,  2.4604e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9301e-02,  2.6465e+00,  2.9477e-02, -1.0845e-02, -1.4092e-02,
        -1.6209e-02, -4.1012e-03, -1.9313e-02, -1.3598e-02, -3.8745e-02,
        -7.2202e-03,  4.3905e-03,  5.4121e-03,  1.2403e-03, -4.5291e-04,
        -2.0050e-02,  1.3881e-02,  4.2821e-03,  4.5801e-03,  1.1439e-02,
         1.2564e-02, -5.3688e-03, -2.8682e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0279,  0.2638,  0.0062, -0.0043,  0.0019,  0.0060,  0.0018, -0.0010,
        -0.0011,  0.0012,  0.0066, -0.0029,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.5773e-03,  1.8605e+00, -5.3858e-02, -3.5632e-02,  6.3917e-04,
         1.0299e-03, -5.5252e-03,  1.9873e-03,  1.8514e-02, -7.8589e-03,
        -1.2723e-02, -3.9631e-03,  1.4854e-03, -5.1040e-04,  8.5102e-03,
        -8.7893e-03,  9.9876e-03, -1.1130e-03,  3.9426e-03, -1.0323e-02,
        -5.2750e-03,  1.5660e-02, -2.0037e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4923e-02,  4.6218e+00, -7.4023e-03, -6.0014e-03,  1.4322e-03,
        -3.9599e-02,  1.6589e-02, -8.3792e-03,  9.1854e-03,  3.3416e-02,
        -1.6899e-03, -3.2768e-02, -4.7556e-03,  7.1614e-03, -1.5702e-02,
        -9.8955e-03, -2.0359e-02, -2.3887e-03, -5.9944e-03,  2.4138e-02,
        -5.4058e-04, -1.2531e-02, -6.1157e-02, -1.6004e-02,  3.4789e-03,
        -1.4717e-02, -4.2277e-02,  3.7826e-03, -2.7481e-02, -3.5163e-03,
         8.5483e-03,  8.3111e-02,  4.6800e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4820e-01,  3.7951e+01, -4.2300e-01, -1.0624e-01,  1.2971e-01,
        -6.6620e-02, -1.8676e-01, -4.0823e-02,  9.0431e-02, -8.1829e-02,
         1.5111e-01, -2.4126e-01,  3.4384e-02,  1.2642e-01,  1.9065e-01,
        -9.0818e-02,  1.3639e-01,  7.5549e-02,  8.1218e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.2090e-04,  6.4742e-01, -1.0979e-02,  6.9160e-03,  3.8451e-03,
         6.6712e-03,  5.4430e-03,  1.5073e-03, -3.4818e-03, -1.2677e-03,
         8.5336e-04, -1.2159e-04,  2.2778e-03, -7.6961e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0013e+00,  1.8896e+02,  2.5821e+00, -5.9259e-01, -2.4234e+00,
        -6.9071e-01, -1.6447e-01, -1.8326e+00,  4.9427e-01,  4.3225e-01,
         2.1139e-01, -1.3136e-02,  5.2198e-01, -7.7022e-01, -5.0159e-01,
        -4.7776e-02, -4.4854e-01,  2.1596e-01, -3.8451e-02, -3.4378e-01,
        -1.6611e-01,  6.6577e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4887e+00,  7.2935e+01,  4.0514e-01, -1.6854e-01,  3.6213e-01,
        -2.8718e-01, -1.5200e-01, -1.2414e+00, -1.9378e-01, -1.1640e-01,
        -6.7131e-02, -1.4995e-01, -1.7057e-01, -8.0084e-02, -1.0491e-01,
         4.2196e-02, -3.0076e-01,  9.3296e-02,  6.2434e-02, -1.4295e-02,
        -8.8237e-02,  1.7336e-02, -2.1017e-01, -3.1969e-01, -2.8928e-01,
         1.1664e-01,  6.7215e-02, -2.5392e-01,  9.5213e-01,  1.3766e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.4639e-02,  1.6765e+00, -3.9309e-02,  8.0592e-05,  1.1405e-02,
         1.7934e-02,  3.4335e-03, -9.5884e-03,  9.4861e-03,  4.4979e-03,
        -8.1250e-03,  1.9635e-03, -1.1925e-03,  2.4560e-02,  1.3639e-02,
         2.8600e-02, -3.8047e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7048, 11.7865,  0.4815, -0.0560,  0.0127,  0.0134, -0.0250,  0.1767,
         0.2108, -0.0579, -0.2297,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8961e-03, -6.5483e+01, -1.5095e+00,  1.9133e-01,  5.5999e-01,
         5.7651e-02,  2.0793e-01, -3.0190e-02,  2.4886e-01, -3.2070e-01,
         2.7432e-04, -3.9918e-01,  2.5374e-01, -1.6959e+00, -1.5644e+00,
         6.3143e-01,  1.1555e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5223e+00,  7.0703e+01, -9.6151e-01,  3.2797e+00,  5.6762e-01,
         8.9033e-02,  2.2945e-01, -7.5643e-02,  3.7155e-01,  9.7206e-02,
        -4.8510e-01,  1.1259e-01,  8.5776e-02,  4.7866e-01, -1.4997e-01,
         1.5140e-01, -1.7036e-01, -2.6247e-01, -1.0642e+00, -2.9222e-01,
        -3.3108e-01, -2.7791e-01,  3.0728e-01, -6.3196e-02, -3.0443e-01,
        -1.6935e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4675e-01,  1.0087e+01,  4.0471e-01, -6.3392e-03, -4.5921e-02,
         1.0224e-01,  2.0389e-02,  3.7609e-02, -6.9146e-02,  3.8350e-02,
         4.9852e-03, -2.7750e-02, -5.1222e-02, -2.2017e-03,  3.2998e-03,
         1.7758e-02, -1.3873e-02,  3.5603e-03, -1.2159e-02, -4.8062e-03,
         2.9687e-02,  4.3967e-02, -3.7559e-02, -9.5026e-03,  9.4268e-03,
        -1.8265e-02,  2.6105e-02, -3.0169e-02, -1.0735e-01, -3.2067e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2273e-02,  3.6569e+00,  8.1512e-03,  3.7093e-02, -4.3726e-02,
        -3.2363e-02,  6.3585e-02, -5.3174e-02, -1.2026e-02, -8.1490e-03,
         1.0000e-03, -1.8028e-02, -4.2640e-02, -1.6484e-02,  2.3178e-02,
         5.4592e-03,  2.0448e-02,  4.2191e-02, -1.5229e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 2.7214e+00,  1.4596e+02, -1.2450e+00,  1.4964e+00,  1.4487e+00,
        -9.5032e-01,  5.7365e+00,  5.8270e-01,  5.2112e-01,  2.3659e-01,
         2.0264e-01,  1.4375e-01,  3.4373e-01,  2.9579e-01, -9.1340e-01,
        -6.7249e-01,  3.3882e-01,  2.8131e+00,  2.5477e-01,  7.4221e-02,
         6.9387e-01, -4.3453e-02, -3.3890e-01, -5.6723e-01, -7.4040e-01,
        -1.0374e+00,  7.4615e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0435,  1.6065,  0.0076,  0.0091, -0.0033,  0.0134, -0.0024, -0.0082,
         0.0078,  0.0098, -0.0144,  0.0242, -0.0102, -0.0042,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3840e-01,  1.0324e+01,  6.7467e-02, -1.0891e-02,  4.7442e-03,
        -5.7547e-03,  1.2575e-01,  2.2062e-02,  5.7700e-02,  1.1627e-01,
         8.2404e-02,  4.6175e-02,  6.7488e-03,  2.6956e-02,  5.3594e-02,
         1.0813e-01,  9.6164e-02, -1.4011e-02,  9.1161e-02,  9.2140e-02,
        -2.9265e-02, -1.8082e-02, -2.0420e-02, -1.6837e-02, -4.0495e-02,
         3.7042e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6016e-03,  8.4705e-01,  9.5971e-03,  5.3488e-04, -3.9164e-03,
        -1.9138e-02,  2.9298e-03,  2.6001e-03,  1.9334e-03,  7.9249e-04,
         4.0864e-03,  7.0124e-04,  1.9039e-03, -4.7607e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3092e-02,  2.9496e+01,  9.2206e-02, -3.1501e-01, -1.7828e-01,
        -1.0717e-01,  2.8985e-03, -1.2037e-01,  2.2720e-02,  8.2066e-02,
        -2.0749e-02,  9.5912e-03,  1.1890e-01, -1.1223e-01,  7.5937e-02,
         1.4714e-01, -1.5602e-02,  1.2202e-01, -7.9940e-02,  4.1240e-02,
        -9.5204e-02, -1.2062e-03, -1.0640e-01, -1.4463e-01,  1.5453e-01,
         7.8626e-02, -2.0187e-01,  1.8321e-02, -1.7451e-02, -3.7012e-02,
         4.9473e-02,  8.0212e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.4222e-01, -1.2539e+02, -3.3768e+00, -1.0034e+00,  1.0601e+00,
        -3.9132e-02,  1.4954e+00,  1.4113e+00, -1.2876e-01,  5.1238e-01,
         9.1117e-01, -2.4076e-01, -2.4118e-01,  3.7772e-02, -4.5936e-01,
        -3.1500e-01, -2.7423e-01, -9.5949e-02, -2.7506e-01, -4.0184e-01,
         9.9734e-02, -6.6978e-01,  4.4599e-01, -4.9210e-01,  2.8308e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0021e-01,  1.2646e+01, -1.2222e-02, -4.5526e-01, -4.0458e-01,
         9.4087e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6314e-01,  5.9780e+00, -4.0548e-02, -3.4294e-02,  4.7630e-02,
        -4.5410e-02,  1.7870e-02, -1.1265e-02,  8.0948e-03,  1.7881e-02,
         4.5815e-03, -1.2723e-02, -5.8486e-03,  1.8158e-02, -2.3703e-04,
        -4.5593e-03, -2.0767e-02, -1.2234e-03,  6.3245e-03,  2.5932e-02,
        -5.8454e-03,  2.9690e-02, -1.8272e-02,  4.7321e-03,  5.1247e-03,
         4.3551e-02, -2.6737e-03,  1.2974e-02, -9.9701e-03,  5.2929e-02,
         1.2421e-02, -6.1226e-03, -5.8511e-03, -7.9494e-03,  1.6007e-02,
        -1.9573e-02, -3.2931e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2789e+00, -1.0671e+02,  7.1924e-01, -3.7568e-01, -1.7053e-01,
        -4.9233e-02, -1.9503e-02,  1.2701e-01,  5.9297e-01, -3.9525e-01,
         1.5725e-01, -3.3975e-01, -2.6089e-01, -5.2088e-01, -1.1692e+00,
         4.9485e-01,  5.9273e-01, -6.6366e-01, -1.6288e-01, -5.3622e-01,
         7.7912e-01,  1.3133e-01,  2.1042e-01, -1.2782e-01, -2.4149e-01,
         6.3919e-02,  5.1323e-01, -2.9719e-01,  5.0015e-01, -5.4975e-01,
         2.8753e-01, -3.8053e-01,  6.4798e-01, -2.5133e-01,  1.0344e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6119e-01,  1.8570e+01, -1.6055e+00,  2.0611e-01,  1.4105e-01,
         4.9406e-02,  8.0880e-02, -4.9803e-03,  2.8840e-02, -1.5317e-01,
         5.7832e-02,  1.9161e-01,  2.6193e-02,  6.4459e-01,  2.6241e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2505e+00,  1.8138e+02, -1.6543e+00,  7.0414e-01,  1.9389e+00,
         5.0933e-01,  4.1134e-01, -2.1698e-02,  4.2898e-01,  6.0457e-01,
        -4.5318e-01, -5.6426e-02, -3.4840e-01, -6.6149e-01, -1.2553e-01,
        -2.5124e-01,  2.1518e-02,  9.7875e-01,  7.5315e-01,  1.1112e-01,
         1.3898e-01,  3.6649e-01,  4.9415e-01, -3.8283e-01,  5.7662e-01,
        -5.4531e-01, -2.8337e-01, -1.3913e-01, -2.1810e-01,  1.6755e-01,
        -5.0925e-01, -3.0643e-01,  3.4482e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6871e+00,  1.0471e+02, -8.7598e-01, -1.5129e-01, -4.2628e-01,
        -1.5041e+00, -1.7638e-01,  3.2511e-01,  2.0214e-01,  6.3324e-02,
         2.4069e-01,  7.3929e-01, -7.9393e-01, -1.3055e+00, -7.5282e-01,
         1.8730e-01,  1.6682e-01, -4.0791e-02, -2.3559e-01, -1.2458e-01,
        -5.0815e-01, -9.4602e-02, -8.4835e-02,  1.4454e-01,  3.0446e-01,
         2.4318e-01, -1.4912e+00,  2.9620e-01, -3.8395e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 1.9412e-01,  2.2782e+01,  4.2056e-01,  3.3429e-02, -1.1692e-01,
        -3.0882e-02, -1.3700e-02, -7.7532e-02,  1.2428e-01, -1.8376e-01,
         1.1272e-03,  4.6027e-03,  7.0033e-02, -9.4453e-02, -6.4823e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8562e+00, -1.4922e+02,  1.6822e+00,  1.0276e+00,  9.5659e-01,
        -1.7304e-01,  1.2343e-01,  1.5855e-02, -4.2267e-01, -5.8557e-02,
         1.9109e-01,  3.6659e-01,  3.3468e-03,  1.5442e-01,  2.3351e-01,
         2.7124e-01, -3.5502e-01,  6.9207e-02, -9.4385e-02, -1.7477e-01,
         1.1103e-01,  2.0027e-01,  7.3840e-02,  1.1974e-01,  2.3320e-01,
        -3.1840e-01,  4.6329e-01,  2.0500e-01,  5.1519e-01, -1.0290e-01,
        -1.5382e-01, -1.6806e-02, -6.7945e-02,  6.3179e-02,  1.7702e-01,
        -2.7553e-01, -8.6626e-02,  1.9456e-01, -1.2159e-01,  2.6588e-01,
         1.0680e-01, -1.2605e-02, -4.2609e-02, -7.9609e-02,  2.0036e-01,
         4.9423e-01,  4.0498e-01,  4.0796e-02, -1.6028e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8979e+00,  4.2002e+01, -9.5386e-01, -2.7755e-01,  2.1033e-01,
         2.6133e-01,  1.3633e-01, -6.4410e-02,  3.2854e-01,  2.5738e-01,
        -1.8284e-02,  8.5575e-02, -4.1366e-02,  2.3023e-02, -9.6619e-02,
        -1.6584e-01,  3.0893e-01,  7.8167e-02, -2.4114e-02,  1.4248e-02,
        -1.0157e-01,  1.1358e-01, -2.0968e-01,  1.2770e-01,  1.7466e-02,
         7.3794e-02,  7.8736e-02,  4.9961e-01,  1.8749e-01, -9.3869e-02,
         1.9840e-03, -4.7553e-02, -9.1537e-02,  6.3301e-01, -6.7299e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3775e-01, -8.3462e+01,  1.0105e+00, -5.0925e-01, -5.3284e-01,
         1.8796e-01,  2.3965e-01,  1.2616e-01,  1.2010e-01, -1.4779e-01,
        -4.7178e-02, -2.4140e-01,  7.7059e-02, -1.7473e-01, -3.9050e-01,
         4.0096e-01, -1.1836e-01,  2.2443e-01, -8.7574e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([   1.8097, -101.8992,    0.4766,    0.6818,   -0.5265,   -0.6309,
          -0.4063,    0.2946,   -0.5781,    0.7953,    0.9079,    0.5331,
           0.1144,    2.9186,   -0.5369,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0585e-01,  6.7210e+00,  1.1092e-01,  5.3869e-02, -8.8878e-03,
        -4.5296e-03,  1.7838e-03,  3.8890e-02, -1.1926e-02, -2.7656e-02,
         6.7986e-02,  3.7947e-02,  2.1249e-02, -1.3799e-02, -2.0612e-02,
         1.3551e-02, -4.8223e-03,  1.5326e-02,  6.4294e-03,  3.2468e-02,
         3.3516e-01, -3.1554e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5926e+00,  6.6721e+01, -6.8514e-02, -9.6617e-02,  2.0655e-01,
        -1.8924e-01,  2.9863e-01, -9.4895e-02, -1.4691e-03,  3.4116e-01,
        -7.1936e-02,  4.6201e-01,  3.8764e-01,  4.7429e-02, -5.3771e-01,
         7.8901e-02, -6.1282e-02,  3.6087e-01,  2.7348e-01, -1.6717e-01,
         5.9682e-02,  3.7829e-01, -1.2451e-01,  5.3256e-01, -1.7758e-01,
        -7.0595e-02, -9.2865e-03,  1.7786e-01,  2.3550e-01,  3.4324e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1849,  6.1078, -0.1115, -0.1307,  0.0990, -0.0800, -0.0347,  0.0224,
        -0.0264, -0.0407, -0.0722,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8570e-01,  5.2136e+01, -6.4971e-01,  1.0639e-01, -1.0615e-01,
         1.8677e-01,  2.3756e-01, -2.7966e-01, -6.3240e-02,  1.4233e-01,
        -2.5791e-01,  8.4383e-02,  2.0331e-01,  8.4454e-02, -1.3254e-01,
         4.9432e-02, -1.5064e-01, -1.9434e-02, -2.6169e-02, -4.8487e-01,
         9.0074e-01,  1.7870e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2073e+00,  8.4017e+01, -1.4268e+00, -1.3819e-01, -2.6138e-01,
        -3.9589e-02,  2.5994e-01,  9.9919e-01, -1.5490e-01, -6.5092e-02,
        -2.2214e-01,  5.4713e-01,  7.2798e-02, -8.2347e-03, -3.1588e-02,
        -4.1127e-01, -4.3274e-02,  4.1711e-01,  9.7778e-01, -5.7810e-01,
        -1.9727e-02, -8.7398e-02, -2.6491e-01, -1.0525e-01, -2.1050e-01,
        -5.5536e-02,  2.3959e-02,  3.5332e-01, -6.3954e-01, -1.3785e-02,
         3.4909e-01,  2.1331e-02, -4.1947e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2777e-01,  6.1099e+01,  2.3139e+00,  2.6174e-01, -5.2362e-01,
        -6.4995e-02,  1.1364e-01, -7.6732e-01, -1.6928e-01, -6.6460e-01,
        -2.9690e-01,  1.0923e-03,  1.0837e-01, -1.4771e-01, -2.5572e-01,
        -3.8748e-01,  4.2983e-02, -1.2041e-01, -1.8476e-02, -1.5956e-02,
         9.2754e-02,  1.7288e-01, -4.4211e-02,  9.2522e-02, -3.4557e-02,
         1.5356e-01,  1.8959e-01,  2.4174e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3192e+00,  1.6124e+02, -6.8513e-02, -8.7151e-01, -1.0187e+00,
         5.6980e-02,  6.5604e-01, -6.1020e-02, -1.5317e+00, -1.9988e-03,
        -4.9023e-01, -3.1632e-01,  6.7548e-02,  4.0969e-01,  2.0539e-01,
        -1.0874e-01,  9.0631e-02, -1.6231e+00, -8.8634e-02,  1.8179e+00,
         2.6657e-01,  2.5869e-02, -1.2213e+00,  1.6209e-01,  1.1728e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-2.0437e-02,  1.2760e+01,  1.3946e-01, -1.8311e-01,  3.2377e-02,
         1.2582e-02,  1.2393e-01,  4.7976e-03,  4.1657e-03,  2.9681e-02,
        -2.3554e-02, -7.2686e-03, -6.8818e-03,  6.0506e-03,  4.5344e-02,
        -1.9726e-01, -8.3749e-02, -1.3904e-02, -3.7833e-02, -1.2293e-01,
         4.9621e-02,  1.0348e-02, -9.9494e-02, -2.2034e-02,  1.2694e-02,
        -5.9490e-02,  6.9403e-03,  7.3258e-03, -1.3385e-02,  2.3215e-02,
         1.6131e-02, -1.3353e-02, -5.3765e-02, -2.7617e-03,  2.7438e-02,
         2.8949e-02, -1.5122e-02,  5.1224e-02,  5.8806e-02,  2.0465e-01,
        -2.3725e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0536e-01,  7.4396e+01,  8.4249e-01, -3.1438e-01,  2.3402e-01,
        -4.2657e-01, -9.5409e-02,  2.9298e-01, -2.1696e-02, -2.6965e-01,
        -7.8802e-02, -7.4335e-01,  4.9348e-01,  5.8017e-02,  3.9263e-01,
        -3.7222e-01,  9.7431e-02,  1.8793e-02,  3.7460e-02, -2.4964e-01,
        -9.1110e-02, -8.5862e-01,  5.5380e-02,  9.6607e-02,  7.5052e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9252e-02,  5.0509e+00,  1.1526e-01, -1.0318e-01,  1.7517e-02,
         3.1249e-02,  3.8786e-02,  4.8111e-04,  1.1595e-02,  4.5799e-02,
         3.8471e-02,  2.5561e-02,  1.4699e-04,  1.7249e-02,  1.4352e-02,
        -2.5124e-03, -2.8509e-02,  8.3511e-03, -1.9535e-02,  1.6177e-03,
        -2.2381e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3071e-01,  3.5158e+00, -3.4186e-02, -2.9502e-02,  1.9861e-03,
        -5.7979e-03,  1.2497e-02, -2.9202e-02, -1.0275e-02,  4.8267e-02,
         4.0200e-03, -6.6532e-02, -2.2319e-03, -2.2386e-02, -3.6267e-02,
        -4.4083e-03, -3.6175e-02, -4.8422e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.1409e-02,  3.0574e+00,  4.5925e-02, -1.7861e-03,  5.3774e-04,
         1.7957e-02,  1.3182e-02, -2.2093e-02, -1.9164e-02, -1.1481e-02,
         1.2079e-02, -1.2356e-02, -1.2022e-02,  7.6322e-03,  5.2521e-03,
         3.0648e-02, -2.0344e-02,  7.6310e-03,  1.0210e-02, -9.4131e-04,
        -1.2529e-03,  3.1692e-03,  3.1096e-03,  2.5334e-03, -6.3979e-03,
         1.5628e-02,  6.4720e-03, -9.2703e-03, -1.5387e-02,  1.6207e-03,
         4.3410e-03, -9.1679e-04,  6.2701e-03,  2.1516e-03,  2.5220e-02,
         1.5731e-02, -3.3983e-03,  6.7653e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.4669e+00,  1.5948e+02, -6.1430e+00, -8.9161e-01, -1.2266e-01,
         2.5185e-01, -1.8200e+00, -2.0783e+00, -2.3565e-01, -3.8147e-01,
        -6.2029e-01, -7.1859e-01, -7.4773e-01, -2.4785e+00, -8.2358e-01,
        -5.1494e-01,  1.3422e-01, -6.0954e+00, -1.2849e+00,  2.1479e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3793, 75.1444, -0.6128,  2.0974,  0.2960, -0.3795,  0.4212,  0.6296,
         0.5347,  0.3335,  0.9032, -0.3385, -0.6025,  0.9450,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8574e-01,  2.7075e+00, -4.5597e-02, -1.4936e-02,  1.3227e-03,
         9.8227e-04, -1.0236e-02, -1.0671e-02, -1.2917e-02,  1.4307e-02,
        -1.9192e-02,  4.0070e-03, -4.8502e-03, -5.0502e-03, -5.8953e-03,
        -2.0438e-02,  5.2670e-03,  2.9336e-03,  1.8099e-02,  1.8105e-02,
        -6.4443e-03,  4.9782e-02,  6.8504e-03,  1.9248e-02, -1.8637e-03,
        -2.0700e-02,  3.5554e-03,  3.2401e-03, -1.0531e-02, -3.7967e-02,
         6.3918e-03, -1.5401e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0695e+00, -8.1277e+01,  6.0039e-01, -3.2888e-02,  1.7150e+00,
        -2.2246e-01, -7.9743e-02,  1.6415e+00,  1.1116e-01,  3.7479e-01,
         7.8631e-01,  8.7185e-01,  3.1273e-01,  4.0047e-01, -4.7813e-01,
         6.9963e-02, -1.0064e-01, -4.9642e-01, -1.4841e-02,  2.6029e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6585e-01,  1.8720e+01,  4.4955e-01, -1.6666e-01,  8.4633e-02,
         1.7043e-01, -1.2875e-02, -7.5826e-02,  3.1540e-02,  5.0375e-02,
        -8.8651e-02, -1.8073e-02, -6.3809e-02, -5.3788e-02, -2.6900e-02,
        -6.8448e-02, -7.7190e-02, -4.8701e-02, -6.5344e-02, -4.2098e-03,
         2.8806e-02, -1.7321e-01, -1.0874e-01,  9.9441e-02,  9.9550e-03,
        -6.1476e-02, -6.4269e-02,  1.5433e-02, -2.4495e-02,  5.1229e-03,
        -4.1202e-02,  1.5412e-02,  3.0285e-02,  1.0573e-01,  3.6609e-02,
         4.1730e-02,  7.5231e-03, -1.1470e-01,  1.5658e-02,  8.4040e-03,
         7.3298e-02, -2.2627e-02, -3.9768e-02, -1.0145e-02,  6.5013e-02,
        -1.3047e-02,  4.9307e-02,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9404e-01,  1.1522e+02, -1.3999e+00, -1.5529e+00,  4.5765e+00,
        -2.0844e-01, -5.3284e-01, -2.4746e-02, -8.9702e-01, -4.8412e-01,
        -1.2203e+00, -4.2211e-01,  5.4889e-01,  1.8220e+00,  2.5505e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4154e-02,  3.9614e+00,  2.4771e-02,  2.9257e-02,  1.8108e-03,
        -9.8915e-04,  8.1645e-03, -1.9114e-02,  1.0241e-02, -8.2459e-03,
        -1.3855e-02, -5.5484e-03, -1.4089e-02,  1.1153e-02,  3.1430e-03,
         1.9966e-03, -8.0363e-03, -7.5625e-03,  6.9882e-03, -2.3305e-03,
        -9.3515e-04, -1.3632e-02, -6.7902e-04,  1.3255e-02,  5.7546e-03,
        -4.5128e-03,  1.5608e-02,  7.7654e-03, -3.0178e-03,  7.9405e-03,
        -2.6647e-03, -4.8214e-03, -3.9927e-03,  5.0403e-03,  1.1692e-02,
         1.7578e-02, -4.1769e-04,  2.3458e-03, -1.9495e-02,  6.9568e-03,
         8.3469e-03,  3.6961e-03, -5.3951e-03, -3.1280e-03, -1.9665e-03,
        -1.2240e-03, -5.9993e-04,  9.7552e-03], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-3.7321e-02,  1.1619e+01,  1.4122e-02, -2.4049e-02,  1.2187e-01,
        -1.5176e-01,  2.1643e-02, -1.1042e-01, -1.0449e-01, -1.4517e-01,
        -1.3257e-02, -1.7303e-01, -5.9052e-02,  4.5180e-03, -2.8699e-02,
        -5.5890e-02, -1.3009e-01, -9.9492e-02, -1.7690e-02, -1.3143e-02,
         7.7812e-03, -3.5865e-02, -1.6322e-03, -5.6998e-02,  1.3923e-01,
         1.2195e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2750e+00,  1.0382e+02,  1.3575e+00,  4.9112e-02, -2.0239e-01,
        -9.7273e-02, -1.2285e+00, -1.4707e-02,  1.8483e-01,  1.0444e-01,
         2.8395e-01, -6.0476e-01, -1.6176e-01, -9.9725e-03, -1.3238e-01,
         6.6044e-02, -1.6135e-01, -7.6003e-01,  3.7566e-02,  1.6018e-01,
         3.5058e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7493e-01,  3.2884e+01, -2.8802e-01, -3.2723e-01, -4.8388e-01,
        -1.7595e-01, -8.2570e-02, -1.1713e-01, -8.3118e-02,  1.7746e-02,
        -3.3077e-02, -1.4630e-02, -9.8559e-02, -9.1347e-02,  2.1669e-01,
        -2.3292e-02, -4.6056e-02, -4.7953e-02,  2.5387e-02, -1.7262e-01,
        -2.9010e-02, -2.0640e-01, -2.9145e-01,  1.0358e-01,  2.3564e-01,
        -3.2722e-01, -8.8385e-02, -1.3113e-01, -1.3740e-02,  1.1310e-02,
        -1.8354e-01, -6.8405e-02,  2.1940e-01, -6.1966e-02, -1.7625e-01,
        -2.1744e-01,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6486, 47.3475, -0.8944, -0.0802, -0.2519,  0.0694, -0.0620,  0.1537,
        -0.3246,  0.4003,  0.3958,  0.1285, -0.1566, -0.5502, -0.3618,  0.2703,
         0.2244, -0.4074, -0.4402,  1.0578,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2064e+00,  1.8970e+02, -1.4188e-02,  1.2740e+00, -1.7888e+00,
        -8.3105e-01, -4.2490e-01, -1.1795e+00,  4.4420e+00,  4.8377e-01,
        -1.7808e-01, -8.5497e-02, -9.9644e-01,  3.3819e-01,  3.4968e+00,
        -1.6272e-01, -2.4822e-01, -1.0280e+00, -1.0684e-01, -5.3586e-01,
        -2.4170e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4515e-01, -2.0475e+01, -1.8020e-01,  6.2598e-03, -2.1771e-02,
        -4.4713e-02, -4.0649e-03,  4.3442e-02,  1.1974e-01, -2.6868e-02,
        -1.6528e-01, -6.0409e-02,  9.3189e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7605e-04,  8.6819e+00,  1.5889e-01,  1.0422e-02, -1.6928e-02,
        -3.6802e-03,  4.2268e-02, -5.8629e-03,  4.4579e-02, -2.2623e-02,
        -2.0768e-02, -5.2265e-02, -1.1474e-02, -2.7108e-02,  1.5540e-03,
         4.2429e-03, -4.1725e-03, -1.2717e-02, -1.4716e-02, -1.6843e-02,
         8.1959e-03, -5.7086e-03, -9.6783e-03, -9.1724e-04,  1.4926e-02,
        -1.8537e-02, -4.9934e-02,  1.5072e-03,  8.4601e-03,  5.0829e-03,
         1.4338e-02,  1.6650e-02,  3.6192e-03,  1.2664e-02,  1.9755e-02,
         2.4874e-02,  3.1374e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6954e-01,  5.7974e+00,  1.3378e-01,  2.8255e-02, -9.0190e-03,
        -3.6325e-02, -4.9289e-02,  8.5691e-03, -9.7475e-03, -1.2781e-03,
        -8.7953e-03,  4.1305e-02, -1.9447e-03,  3.1289e-02, -1.1479e-02,
         2.1288e-02, -2.9223e-02,  6.9878e-04, -2.1468e-03,  1.7537e-02,
         1.9690e-02,  5.6400e-03, -7.4862e-03, -2.1308e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1511e-01,  4.7646e+00,  1.0448e-01,  4.4477e-02,  7.8004e-03,
         5.9809e-03,  8.0518e-03,  2.3462e-02, -1.5895e-03, -1.5602e-02,
        -2.3067e-02,  9.3035e-02,  3.0037e-02, -8.6865e-03,  2.5896e-02,
        -9.4978e-03,  1.5456e-02,  1.0689e-02,  1.1050e-03,  9.5413e-03,
        -5.8489e-03,  1.7852e-02, -1.4924e-03, -7.6428e-04,  3.0792e-03,
         6.8382e-03, -1.3369e-02,  3.4643e-05, -1.0210e-02, -1.8944e-02,
        -1.5315e-02, -5.9889e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3845e+00,  8.1200e+01,  1.0030e+00, -3.0005e-01,  9.6939e-01,
        -1.0647e+00,  5.6642e-02,  4.2379e-01,  5.3485e-01,  8.0378e-01,
        -6.2336e-02, -1.8230e-01,  1.1077e-02, -2.6202e-01, -8.1750e-02,
         2.1693e-01, -1.0636e-01,  4.6209e-02,  3.5656e-01,  4.1447e-01,
        -1.9367e-01, -1.7415e-01,  1.8825e-01,  5.0576e-01,  1.1910e-01,
         6.2595e-01,  7.9526e-01, -1.1776e-01,  3.7714e-01,  2.8014e-01,
        -2.4614e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3747e+00, -1.4067e+02, -4.3775e+00, -6.7370e-01, -7.6829e-01,
        -1.8831e-01, -3.4449e-01,  3.1052e+00,  3.2857e-01,  4.1821e-01,
        -7.2807e-01, -5.9968e-01,  9.7694e-01,  1.9286e+00, -5.4809e-01,
         6.3536e-01, -4.2724e-01, -1.0453e+00, -3.2005e-02,  6.7323e-01,
        -1.2368e-01,  5.0655e-01,  9.6612e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6971e-01,  1.0549e+01, -1.1415e-01, -1.0497e-01, -1.1046e-01,
        -6.6013e-02, -4.5672e-03, -9.0083e-02,  3.0311e-02,  3.8839e-02,
        -3.6872e-03, -1.1722e-01, -3.6295e-02, -5.0635e-02,  9.8746e-02,
         2.5373e-02,  5.0074e-03, -3.6901e-02,  2.8405e-02,  1.6042e-02,
        -2.2315e-02,  3.3095e-02,  1.0909e-01, -5.9852e-03, -4.2994e-02,
        -3.2106e-02,  2.0910e-01,  1.1209e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-4.5975e-01,  1.8412e+01,  2.9007e-01, -7.6868e-02, -1.1299e-01,
        -1.9343e-02,  7.4443e-02, -9.5171e-02, -7.3980e-02,  2.7159e-02,
         1.0984e-01,  1.4944e-02, -2.3350e-02,  3.1471e-02,  2.8204e-02,
         7.7939e-02, -1.5281e-02,  1.5445e-01, -4.2984e-02, -2.6614e-02,
         3.6053e-02,  1.3806e-02, -3.2590e-02,  6.2744e-02,  7.0113e-02,
         1.7458e-02,  1.0768e-01,  7.9255e-02, -1.0592e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7900e-01,  3.8822e+01,  9.6531e-02, -2.6698e-01,  2.1994e-01,
         5.4840e-01,  2.9458e-01,  3.3934e-03, -1.1708e-02,  1.8991e-01,
         7.8420e-02,  5.8684e-02, -3.1216e-01,  2.1909e-02,  7.9078e-02,
         4.6564e-02,  1.8090e-01, -2.8492e-01,  3.8143e-02,  1.6237e-01,
         5.5321e-02,  3.9090e-01, -5.3006e-01, -3.5869e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.3041e-01, -3.3335e+01,  3.8304e-01, -5.4474e-01, -1.2668e+00,
        -2.0717e-01, -6.0039e-02, -2.1005e-01, -1.5228e-01, -1.9739e-01,
        -1.6143e-01, -3.3728e-01, -5.6645e-01,  3.3889e-02, -5.3629e-02,
        -3.6119e-02, -2.1712e-02, -4.0752e-02, -3.6421e-01, -7.5213e-02,
        -3.8887e-02,  6.4223e-02, -3.7099e-02, -8.0531e-02,  1.3049e-02,
         6.7839e-02,  2.9418e-01, -1.1023e-05, -4.0994e-02, -5.1713e-02,
        -5.0473e-02, -2.0243e-01,  3.1978e-01, -2.0059e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6489e+00, -2.3596e+02,  2.1332e+00,  4.0759e-01, -2.8165e-01,
         8.9985e-01,  1.4525e-01,  3.7710e-01,  3.2726e-01,  9.8551e-01,
        -7.4440e-01, -4.6418e-02,  7.2119e-02,  7.2940e-01,  4.4255e-01,
        -1.6740e+00, -3.8096e-01, -2.4147e-01,  2.9020e-03, -1.9859e+00,
         1.2034e-01,  2.0029e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5598e+00,  5.0006e+01,  9.1291e-01,  6.7252e-02,  4.2053e-01,
        -2.3297e-01,  3.9133e-02,  2.2870e-02, -3.3148e-02,  4.4888e-01,
         4.0871e-02, -5.0098e-02, -4.3603e-02,  1.0353e-01,  3.0398e-01,
        -1.4357e-01,  4.5318e-01,  1.3433e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0416e+00,  7.2976e+01, -1.0958e+00, -3.0176e-01,  2.5475e-01,
        -7.3219e-02,  1.2501e-01,  1.0643e-01,  2.3747e-02,  4.0206e-01,
        -1.3354e-01, -2.8017e-01, -6.5253e-01,  4.3329e-01,  7.5715e-02,
         3.6621e-01,  3.6499e-01, -9.9427e-01, -5.0171e-01, -3.3286e-01,
        -4.5123e-02,  3.4334e-01,  8.9661e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7394e-01, -8.7501e+01,  2.7523e-02,  4.9835e-01,  1.1928e+00,
         1.5588e-01, -1.8288e-01,  5.7890e-01,  5.8189e-02,  2.0078e-01,
         3.3731e-01, -9.5563e-02,  7.4233e-02, -1.9727e-02,  5.0604e-01,
         1.0612e+00,  2.1596e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5398e-01,  2.2391e+01,  9.6274e-01,  1.1171e-01,  5.4338e-02,
         7.5573e-02, -1.5152e-01, -4.9728e-02, -1.7903e-02, -8.6500e-02,
         1.1686e-01, -4.8907e-02, -9.5901e-02,  2.6700e-02,  8.7650e-02,
         1.6077e-01,  6.2582e-02, -4.3291e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6518e+00,  8.5652e+01, -1.7454e-01, -7.9406e-01,  1.5890e-02,
         3.5038e-01, -4.2954e-01, -1.3102e-01, -4.0561e-01,  1.4819e+00,
         7.8085e-02,  1.0882e-01,  6.7567e-01, -1.3315e-01, -2.3406e-01,
        -2.0178e-01,  6.0310e-01, -3.7552e-01,  4.4045e-01, -4.8001e-01,
        -3.2021e-01, -6.1550e-02, -2.5181e-01, -9.6701e-03,  2.0435e-01,
        -1.2377e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7465e-02,  1.2063e+00,  4.4211e-02, -5.5226e-03,  1.6160e-03,
        -2.4481e-04, -6.3875e-03,  1.7383e-02,  2.0338e-03, -1.7316e-02,
        -8.1307e-03, -9.3121e-03, -6.4480e-03, -3.4266e-03, -6.0885e-03,
         1.0396e-02,  7.5844e-04,  2.4223e-03,  2.3322e-03, -2.7148e-03,
        -3.2834e-03,  1.1063e-03, -3.2737e-03, -1.8769e-03, -3.0789e-03,
        -3.0569e-03, -2.1600e-03,  1.5719e-03, -3.7421e-03,  3.1720e-03,
        -2.4183e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3055e+00, -1.3182e+02, -1.2500e-01, -5.6674e-01,  1.0976e+00,
        -2.8327e-01,  2.0155e-01, -1.2292e+00, -2.3621e-01, -1.7262e+00,
        -1.8965e-01,  3.0236e-01, -9.1456e-01,  7.1778e-01, -2.1876e-01,
        -3.3390e-02, -2.4258e+00, -3.8045e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3070e-02,  1.3018e+00, -3.2428e-02,  5.4264e-03, -3.0225e-03,
        -2.0333e-02, -4.1942e-04,  1.7022e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-3.5829e-01,  4.6633e+01, -3.2290e-01, -4.0648e-01, -4.7266e-01,
        -7.3949e-02, -3.0143e-01, -1.2748e-01,  1.8699e-02, -9.0893e-02,
         7.2370e-02, -2.4221e-01,  5.4729e-02, -3.7639e-01, -1.7872e-02,
         1.7131e-01,  2.0443e-01, -2.3422e-01, -2.8895e-02,  2.1414e-01,
         3.2236e-01,  3.0864e-01, -1.4341e-01, -2.4452e-02, -3.5732e-02,
        -3.6527e-02, -5.6930e-02, -3.8835e-02,  2.0609e-01, -5.0609e-02,
         3.2032e-02, -2.7125e-02,  1.1894e-02, -8.5693e-02,  9.1137e-02,
         1.1456e-02,  6.6764e-02,  3.4819e-01, -1.6374e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.7980e-02,  8.6887e+00, -2.0300e-01,  5.5163e-02, -3.8536e-02,
        -2.5293e-02, -1.8085e-02, -2.9070e-02,  3.4327e-02,  1.4010e-03,
        -7.6960e-03,  3.9038e-02,  3.1102e-02, -2.9127e-02,  2.5594e-02,
         4.8398e-03,  1.9366e-02, -1.5722e-02,  4.8743e-03,  2.0368e-02,
        -3.5422e-02, -4.4656e-02,  3.0868e-02, -3.8754e-03,  1.4992e-02,
         1.2515e-02,  6.1197e-02, -7.5926e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6573e-01, -7.8333e+00,  1.1348e-01, -1.8943e-01, -7.7419e-03,
        -2.3456e-02,  9.2925e-02,  4.7671e-02, -1.0866e-01, -1.1551e-02,
         3.8447e-03,  4.2637e-02,  5.9194e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9486e-01, -6.0609e+01, -1.0169e-02,  1.5541e-01,  6.0959e-01,
         3.9603e-01, -3.6322e-01,  2.2007e-01,  1.1232e-01, -7.5071e-02,
         7.2841e-02,  6.1918e-01, -3.4349e-02,  5.1318e-01,  4.8479e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6549e-02, -4.6900e+00, -6.3507e-02,  1.1914e-02, -7.4617e-03,
         1.8083e-02, -2.8757e-02, -1.3676e-02, -1.0723e-02,  1.5482e-02,
        -2.2104e-02, -1.1255e-02,  4.1682e-02,  9.3632e-03,  1.2507e-02,
         1.8809e-02, -4.9783e-05, -5.4159e-03,  1.1938e-02,  1.3663e-03,
        -4.5040e-03, -1.3671e-03,  1.7564e-04,  2.0267e-02, -5.1954e-03,
        -6.3685e-03,  2.9597e-03, -1.6972e-02, -7.4887e-03,  5.9072e-03,
         1.7557e-02, -4.4750e-03,  1.1280e-02,  1.8770e-03, -2.6933e-02,
        -2.2415e-03, -1.2812e-03, -3.4207e-03,  1.0526e-02,  7.4186e-03,
         2.1107e-03,  9.7278e-03, -6.8457e-03, -2.5272e-03, -7.8577e-03,
         7.7096e-03,  4.9700e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.8336e-01, -8.4804e+01,  4.7628e-01, -1.2810e+00,  5.6984e-01,
        -2.5749e-02, -3.1164e-01,  4.6203e-01,  6.4585e-01, -1.2060e-01,
         4.1433e-01,  6.5915e-02, -2.8994e-02,  5.8877e-02,  5.8336e-02,
         1.9960e-02, -3.1308e-01, -2.2923e-01, -8.6841e-02, -9.2143e-03,
        -1.1126e-01, -4.4852e-02, -2.2226e-01, -5.3731e-02, -1.0457e-01,
        -1.4522e-01, -9.6309e-01,  5.1524e-02,  8.9058e-03,  1.3502e-01,
        -8.6834e-02,  6.9252e-02, -1.3994e-01, -1.9474e-01, -1.8998e-01,
         1.5075e-02,  2.9351e-02,  3.3620e-02, -2.6334e-02,  1.0496e-02,
         1.6156e-02,  7.0815e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.9570e-01,  3.7440e+01, -1.5169e-01, -9.0378e-01, -3.4867e-02,
         1.6466e-01,  6.7506e-01, -8.0601e-02,  2.9766e-02, -6.2336e-02,
         1.8363e-03,  3.4458e-01, -2.2522e-01,  2.7727e-01,  2.5083e-01,
        -2.8761e-01,  3.8474e-01, -1.1135e-02, -1.6020e-01,  1.7086e-01,
         4.0108e-02, -7.9143e-02, -3.1093e-01, -1.3226e-01, -3.9398e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8915e-01,  1.4708e+01,  2.4888e-01, -7.5072e-02,  4.8546e-02,
        -3.2999e-02,  1.1383e-01, -3.0615e-02,  8.0574e-02, -1.2072e-02,
         4.9866e-02,  3.6196e-02,  7.7226e-02, -5.9806e-02,  1.7290e-02,
         3.4054e-02,  6.6191e-02, -1.1384e-02,  3.2239e-02,  8.6395e-02,
         9.3098e-02,  1.6995e-02, -1.3779e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9392e-01,  1.4287e+02, -4.2120e-01, -4.1214e-01,  5.5670e-02,
        -1.6869e-01, -2.4266e-01, -3.9077e-01,  1.3127e-01,  1.9850e+00,
         3.2742e-01,  2.6030e-01,  1.9381e+00,  1.0256e+00, -9.5103e-01,
        -3.9807e-01,  7.4482e-01,  4.0572e-01, -1.7527e-01, -2.1287e-01,
         4.6679e-01, -1.8441e+00,  9.2246e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3522e+01,  1.3245e+02,  2.7766e-03, -2.6158e+00,  5.7012e-02,
        -1.5451e-01,  2.6776e+00, -1.1347e+00,  1.1063e+00,  3.4195e-02,
        -4.3240e-01,  1.4322e-01,  1.2704e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0140e+00,  2.3128e+02,  9.7239e-01, -1.8480e+00, -1.9247e-02,
         2.5931e+00, -5.5342e-01,  9.3396e-01, -3.7171e-01, -2.4216e+00,
         5.0180e-01, -1.8985e+00,  2.8881e+00, -1.0949e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3264e+00, -1.0363e+02, -1.4189e+00,  1.1003e-02, -3.7714e-01,
         6.7994e-02, -9.8290e-02, -1.7360e-01,  4.5029e-02,  2.9649e-01,
         4.7265e-01, -3.2287e-01, -4.4591e-01, -3.3791e-01, -3.8421e-02,
        -1.8062e-01,  7.5275e-01, -2.5888e-01, -2.0428e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 7.7833e-01,  1.4809e+02,  1.4268e+00, -1.4073e+00, -2.1197e-01,
         8.7515e-01,  6.9513e-01,  2.3016e-01, -1.0371e-02,  8.1846e-01,
         1.3893e-02,  5.6505e-02,  8.1012e-02,  1.0303e+00,  5.5431e-01,
        -1.7988e+00,  9.3547e-01,  3.7221e-01,  9.4213e-01,  1.1021e-01,
         1.4141e-01, -3.5597e-02, -4.7750e-02,  4.7278e-01, -2.2576e-02,
        -7.4721e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9111e-03,  1.8997e+00, -1.4745e-02, -2.7493e-02,  5.2263e-03,
        -1.5774e-03, -6.5062e-03, -4.0350e-02, -3.6945e-03,  8.1781e-04,
        -6.4188e-04, -2.1268e-03,  2.1560e-05,  1.2087e-02, -1.9883e-02,
        -1.3715e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0323e-02,  1.7942e+00,  8.4457e-03, -4.4524e-03, -2.9646e-03,
        -1.8983e-02, -6.1706e-03, -2.4417e-02,  2.9577e-02, -6.6365e-03,
         1.0227e-02, -1.7026e-02, -8.5176e-04,  1.9758e-02, -2.0024e-02,
        -3.1957e-02,  6.2556e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6050e+00, -3.9253e+01, -6.2633e-01, -2.2233e-02, -1.1827e+00,
        -1.8488e-01,  1.0688e-01, -1.4172e-01, -3.5565e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6757e+00, -7.2028e+01,  8.9117e-01,  8.1808e-01,  1.3653e+00,
        -3.9130e-01, -3.2895e-01, -2.9287e-01,  2.2279e-03,  1.9975e-01,
        -3.5741e-01, -9.4249e-02,  1.6793e-03,  1.4474e-01, -2.5736e-01,
         2.3860e-01, -3.9457e-01, -6.4476e-01,  8.8232e-02, -1.0916e-01,
        -1.9478e-01, -3.7635e-01, -2.3585e-01,  2.0192e-01, -3.9288e-01,
        -1.9519e-01,  5.6506e-01, -1.5035e-02,  3.5285e-01,  1.2378e-01,
        -1.7889e-01,  8.6692e-02, -2.6443e-01,  1.2250e-01,  5.3501e-02,
         4.8515e-01,  5.9587e-01, -5.3107e-01,  4.7341e-01,  2.3661e-01,
         1.8239e-01,  7.2070e-01,  5.8359e-01, -2.7674e-01, -2.1551e-01,
         1.1237e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7538e-02,  1.2827e+00,  2.7828e-02, -4.3582e-03,  1.1749e-02,
         1.0714e-03, -1.4702e-03,  4.7044e-03, -4.8089e-04,  5.2989e-03,
        -1.0227e-03, -2.6794e-03, -2.2553e-02, -1.5080e-04,  1.8138e-03,
        -7.5264e-03,  1.2314e-03, -6.6448e-03, -3.6018e-03,  1.8495e-03,
         1.9891e-03, -2.2089e-03, -2.4445e-03, -5.0266e-04,  1.0027e-03,
        -3.0850e-03, -7.1647e-03, -1.1250e-03,  2.7277e-03, -1.7091e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1550, 71.7028,  1.1194,  0.4210,  0.8344, -1.4908, -0.6974,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  2.1078, 176.0239,  -0.5060,  -0.4994,   1.7867,  -0.3009,   0.7206,
          0.6269,  -0.2658,  -1.3198,  -0.8932,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2622, 31.9100,  0.2596,  0.0431, -0.0631,  0.2624, -0.1005,  0.2058,
         0.1989,  0.1174, -0.1252,  0.0652,  0.1109, -0.7983,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3029e+00, -1.6361e+02, -3.9669e-01,  9.9037e-01, -4.5677e-01,
        -3.7203e-01, -2.1237e-01,  4.8963e-01,  4.8827e-01, -2.4424e-01,
        -1.8932e-01, -2.2306e-01,  3.9964e-01,  6.8242e-01,  1.7531e-01,
        -3.2134e-01, -3.1356e-02, -2.2220e-01, -5.8840e-01, -2.9635e+00,
        -5.3565e-01, -1.3817e+00,  4.7486e-01,  2.2899e-01,  4.7167e-01,
        -3.0379e-01, -9.7901e-01, -1.3036e-01, -1.1709e-01,  9.2602e-01,
         3.0950e-01,  9.0737e-01, -1.2879e+00,  7.6562e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.1432e+00, -1.5458e+02, -1.6919e+00, -2.4381e+00, -3.8514e-01,
         5.4758e-01, -9.4084e-01,  9.2440e-01, -2.2071e-01, -2.4564e-01,
        -5.4783e-02,  4.1201e-01, -1.9438e-01, -1.0385e-01,  2.2141e+00,
         8.0610e-01, -7.7680e-01, -4.8727e-01,  5.3137e-01,  4.3929e-01,
         1.3135e-02, -5.1145e-01,  5.1628e-02,  3.8197e-01,  2.5990e-01,
         2.5537e-01, -1.2711e+00,  7.0343e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0166e+00,  4.7551e+01,  3.7407e-01, -7.4391e-02,  5.2421e-01,
        -1.7862e-02, -1.6617e-01,  7.4822e-02,  8.4413e-02,  1.1846e-01,
         3.0113e-02,  2.6924e-02,  2.4086e-01, -1.1236e-03, -1.4836e-01,
         2.3075e-02,  2.7243e-01, -1.4380e-01, -1.2182e-01,  1.0906e-01,
         2.0139e-02, -1.4964e-01, -1.3994e-01,  2.9320e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 7.1725e+00,  1.7238e+02,  6.7800e+00,  4.6500e+00,  3.3886e-01,
        -8.9314e-01,  1.2018e-01, -4.6996e-01,  4.5740e-01, -2.2498e-01,
        -1.7957e+00,  1.4114e-01,  5.8605e-01,  4.9100e-01,  7.8758e-01,
        -3.7074e-01, -5.6159e-01,  2.2848e-01, -4.0700e-01,  8.8560e-01,
         4.1857e-01,  7.5566e-01,  4.7852e-01,  8.0405e-02,  4.8107e-01,
         8.7059e-01,  3.1161e-01,  2.0738e-01, -1.6773e-01,  1.3066e-01,
         9.7576e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  -5.5428, -129.5475,   -2.6296,   -2.7090,    0.5601,   -1.1619,
           0.1469,    0.8173,   -3.3915,   -0.3389,    1.4228,   -1.2374,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0853, 63.5307, -0.6985,  0.4822,  0.6353, -0.2964, -0.6323, -0.3239,
         0.1328, -0.1333, -0.3597, -0.6820,  1.2172, -0.2509,  0.3804,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.8026e-01,  7.7914e+01,  2.2548e+00,  6.3970e-01,  4.0187e-02,
         4.5378e-01, -6.0055e-02,  1.9226e-01, -7.3786e-03,  1.6458e-01,
         1.4669e-01, -5.6896e-02,  8.2598e-02, -6.7076e-02,  2.0225e-01,
        -1.3018e-01,  8.9237e-02, -1.4812e-01, -1.6237e-02, -2.8059e-01,
         5.4012e-02, -1.9483e-02,  9.0036e-02,  1.3473e-01,  1.1158e-01,
         7.1493e-02,  6.9982e-02, -3.0833e-01, -1.4355e-02, -5.7866e-02,
         7.3909e-02,  1.7571e-01,  3.3012e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6357e+00,  6.9512e+01, -1.1184e-01, -4.0785e-01, -9.8769e-02,
        -4.3271e-02, -2.8515e-01, -1.7863e-02,  1.0448e-01, -3.1000e-01,
        -1.5347e-01, -7.1899e-02, -5.2061e-01,  4.4690e-01, -1.4603e-01,
         1.0774e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1985,  7.4368,  0.0093, -0.0449,  0.0230,  0.0088, -0.0147,  0.0104,
        -0.0091,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4235e-02, -1.2219e+02,  7.5092e+00,  2.9670e+00,  2.0245e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.0968e+00, -1.6576e+02, -1.1377e+00,  1.4452e-03,  2.4201e-01,
         3.5687e-01,  1.0044e+00,  5.5498e-02,  1.1678e+00,  5.7502e-01,
         5.3656e-01,  7.8813e-01,  1.0169e+00,  2.1136e-01,  2.9203e-01,
         2.9471e-01,  5.4090e-01,  1.0503e-01, -4.1286e-01,  1.0027e-02,
        -1.0971e+00,  1.2256e-01,  9.8130e-03,  3.0703e-01,  1.7770e-01,
         3.5907e-02,  1.4832e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([   6.8808, -141.6026,   -3.0445,    0.3524,   -2.9125,   -0.6149,
          -1.8515,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1743e+00,  2.3443e+02,  5.7048e+00,  8.8215e-01,  1.2919e-01,
        -6.3918e-01, -2.2532e+00, -1.3944e+00, -1.5080e+00, -8.4000e-01,
        -2.2421e-01,  3.0077e-01, -3.4337e-01,  1.1113e+00,  2.1428e-01,
         2.0463e-01,  5.0609e-01,  1.2634e-01, -5.1629e-01, -4.4711e+00,
        -1.0651e+00,  3.7020e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2985e+00, -1.5211e+02, -6.0911e-01,  1.1634e+00, -9.2807e-01,
        -9.4957e-01, -2.1381e-01, -1.0172e+00,  1.7311e-01,  8.8040e-01,
         8.6343e-01,  2.2787e-01, -2.1212e-01, -1.4541e+00,  3.5187e-01,
         6.9095e-01,  5.4904e-02, -1.7148e-01, -7.4022e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0096e+00, -9.8640e+01,  7.6917e-01,  2.1449e-01,  6.5679e-01,
        -4.0257e-01, -3.8308e-01,  2.8560e-01, -1.3729e+00,  6.2257e-01,
         8.9090e-02, -2.0916e-01,  1.7138e-01, -3.0399e-01,  4.0193e-01,
         2.9116e-01, -9.6239e-03, -2.2579e-02, -8.2063e-02,  4.0320e-01,
        -3.2226e-01, -5.9038e-01, -1.1352e-01, -1.8789e-01, -2.4606e-01,
         6.2716e-02,  2.3569e-01,  1.0508e-01, -1.3497e-01,  8.3049e-01,
        -3.7965e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 2.2414e+00, -4.1802e+01,  5.5827e-01,  4.9107e-01, -1.2726e-02,
        -8.9344e-03,  8.6485e-02,  1.3311e-01,  9.6943e-02, -7.2638e-02,
         3.3161e-02, -3.9222e-02, -1.9073e-01, -4.9946e-01, -1.3156e-01,
         1.5119e-01, -1.2696e+00, -3.2407e-02,  4.4366e-01, -1.2631e-01,
         8.5913e-02,  1.1886e-01, -1.2448e-01, -3.0733e-02, -2.1492e-01,
         3.0130e-02,  1.2825e-01, -7.2751e-02,  1.1725e-01, -2.2252e-01,
        -2.8316e-01, -2.8863e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1676e+00, -2.2637e+02,  1.6002e+00,  5.6185e-01,  3.0965e-01,
         8.7024e-01,  3.1901e-01,  9.9634e-01, -3.4917e+00,  6.6357e-01,
         7.1847e-01, -2.0885e-01, -1.6514e-01,  4.6054e-01, -4.5074e-01,
         5.3661e-01,  6.2490e-02, -6.6968e-01,  8.2886e-01, -3.8374e-01,
         4.0274e-01, -1.8995e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4836e-01,  2.6018e+01, -2.1229e-01, -1.9657e-01, -8.3648e-02,
        -3.0701e-02, -7.7180e-02, -9.9330e-02,  5.4187e-03,  4.6170e-02,
        -2.8683e-02, -1.1068e-02,  3.4284e-02, -7.0195e-03,  1.4572e-02,
        -8.1299e-03, -1.2541e-01,  3.0892e-01, -5.6490e-02,  1.2889e-02,
        -4.9934e-02, -1.5618e-02,  2.1165e-02, -3.6314e-02, -3.1655e-02,
        -7.6751e-03, -3.9540e-02, -2.1722e-01, -3.7869e-02,  1.5830e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.8861e-02,  5.5079e+00, -2.0854e-01, -4.1970e-02, -2.3336e-02,
         1.3922e-03,  6.1285e-02, -1.2878e-02, -3.1420e-02,  3.3307e-02,
        -3.1492e-02,  3.5785e-02, -2.0277e-02, -2.1495e-02, -7.2337e-03,
        -4.5037e-02,  3.4474e-02,  3.7015e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1213e+00,  2.0500e+02,  7.8214e+00,  3.2705e-01, -7.5765e-01,
        -2.2665e+00, -5.5067e-01, -5.0438e-01, -1.2067e+00,  4.0641e-01,
         4.6636e-02,  2.1446e-01,  2.6405e-01, -3.4601e-01, -6.5526e-01,
        -1.3290e+00,  2.6801e-01, -5.1188e-01, -6.5735e-01, -3.8582e-01,
        -3.3874e-01, -1.2418e+00,  9.0804e-01,  1.4113e-01,  2.0499e-01,
         1.6087e-01,  5.2948e-01,  1.7498e-01, -7.3384e-01,  1.8627e-01,
         1.8336e-01, -5.5769e-01,  4.4518e-01,  3.1337e-01, -2.5223e-01,
        -2.1567e+00,  7.1981e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8695e-02,  5.8290e-01,  2.3781e-04, -2.4492e-04, -1.8721e-03,
         3.1701e-03, -6.5083e-05,  4.1760e-03,  8.8029e-04,  3.5935e-04,
         1.7248e-03,  6.7176e-03, -6.8952e-03,  1.7162e-03,  6.3772e-03,
         3.5171e-04,  4.9304e-05,  1.9478e-03, -1.8532e-04,  7.8958e-05,
         1.3424e-03, -6.2162e-04,  4.8004e-04,  3.3580e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.6513e+00, -1.9493e+02, -5.6149e-01,  6.3457e-01,  3.1239e-01,
         5.6552e-01,  8.3363e-01, -2.3103e-01,  3.3487e-01,  4.3738e-01,
         8.8356e-01,  4.4118e-01,  1.0744e+00, -1.5477e-01,  2.4535e-01,
        -5.0068e-02,  4.3967e-01,  1.3714e-01,  3.5242e-01,  3.0619e-01,
         3.3933e-01,  9.4313e-01,  5.0417e-01, -4.0822e-01,  8.9086e-02,
        -1.3517e+00, -4.5913e-03,  5.3276e-02,  1.2344e-01,  5.4728e-01,
         6.8155e-03,  1.2654e+00,  4.7165e-01, -1.6491e-01, -1.6667e-02,
        -2.0139e-01,  2.8440e-01,  1.5988e-01, -1.4768e-01, -4.4986e-01,
         5.0356e-01,  1.5929e-01,  2.4623e-03, -5.7600e-01,  3.1378e-01,
         1.2130e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7127e-02,  2.1113e+01, -2.6056e-01, -3.8496e-01, -6.9412e-01,
         6.0794e-02, -2.2982e-01,  5.7593e-03,  1.0714e-02, -9.4202e-02,
        -9.7271e-02,  1.0787e-02,  1.4262e-01,  1.5615e-02,  5.2000e-03,
        -5.8817e-02, -1.0710e-01,  2.8092e-01,  2.6533e-02,  7.2411e-02,
         2.3510e-01,  2.6375e-01,  2.1351e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([  -2.5839, -103.1610,    1.0206,   -0.3550,   -0.4648,    0.3882,
          -0.1937,    0.2162,   -0.3890,   -1.1763,    0.3889,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.3063e+00,  1.7155e+02,  2.4163e+00, -3.7465e-01,  9.0498e-01,
         5.5207e-01,  1.0352e+00, -5.1034e-01,  9.8384e-01, -2.2282e-02,
        -5.4307e-01,  1.2059e-02,  4.8275e-01,  1.2792e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6422, 56.1303, -0.2143, -0.5807, -0.3556, -0.1767, -0.5479, -0.0608,
        -0.1825, -0.3006, -1.3836,  2.0295,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.1583e-01,  4.8707e+01,  5.0651e-01,  2.0757e-01, -1.3877e-01,
         3.5580e-02,  1.7220e-01, -2.9951e-03,  1.2476e-01,  1.3846e-01,
        -9.6757e-02, -9.6077e-02,  6.0585e-02, -4.1190e-02, -1.4561e-01,
        -1.2881e-01, -1.1695e-02, -1.6143e-01, -3.3037e-02, -9.6204e-02,
        -7.1267e-02,  3.4456e-03,  1.9670e-01,  5.9341e-02,  1.6567e-02,
         6.7828e-02,  4.8925e-01, -5.9196e-02, -5.7502e-02, -1.2360e-01,
        -5.0977e-02,  2.7938e-01,  2.6509e-02,  7.9358e-02,  1.7615e-02,
         1.9914e-02, -1.4829e-02,  8.7817e-02,  5.8870e-02, -7.9386e-02,
         6.6702e-02, -3.0353e-02,  5.4587e-02,  1.4113e-02, -3.6370e-02,
         1.5907e-02, -3.1694e-02, -4.1993e-03,  3.0737e-02, -9.1390e-02,
        -8.6257e-03, -2.5836e-02, -6.9084e-02,  1.9825e-01,  3.3960e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 1.4774e-01, -1.6343e+02, -3.0971e-01,  6.1470e-01,  4.1561e-01,
        -1.7781e-01,  3.0949e-01,  1.2702e+00,  2.9986e-01,  2.0213e-02,
         1.0430e+00, -5.4979e-01, -9.9379e-01, -3.8237e-01, -1.2668e-01,
        -4.5771e-01,  3.7265e-01, -4.6414e-01, -2.0563e-01, -2.5832e-02,
         2.0825e+00, -4.0978e-01, -1.1918e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7032e+00,  8.9308e+01,  1.3995e+00, -1.3683e+00,  6.9754e-01,
        -1.7770e-01, -6.0022e-01, -3.5879e-01, -6.4548e-01, -7.3355e-01,
        -7.6880e-01, -3.7901e-01, -8.1268e-01, -2.0300e-01,  2.6486e-02,
        -5.9707e-01, -7.9811e-01,  3.9957e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.7595e-03,  4.0004e+01, -3.6502e-01, -2.6664e-01,  3.6031e-01,
         2.1524e-01, -1.0816e+00, -1.5317e-01,  2.9769e-01,  2.7914e-01,
         2.9030e-01,  7.8440e-02,  7.3033e-01, -1.1027e-01,  1.9909e-01,
         1.8428e-01,  2.1033e-01,  2.3499e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.1630e-01,  2.4578e+01,  5.7086e-02,  8.4776e-03, -7.9748e-03,
        -1.6953e-01,  5.6851e-02, -4.6897e-01,  2.5908e-01,  1.0801e-01,
        -6.5483e-02,  3.8825e-01,  2.1741e-01,  2.7015e-01, -9.7888e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3228e-01,  1.9261e+01,  2.6791e-01,  8.6080e-02, -9.7600e-02,
        -4.0771e-02,  1.8911e-03,  6.2317e-02,  1.0185e-01,  7.1690e-02,
        -1.3065e-01,  2.0536e-02,  8.0934e-02,  2.8468e-02, -2.2324e-02,
         1.1232e-01,  7.0823e-02,  5.8366e-02, -4.9762e-02,  2.0334e-01,
         7.7913e-02, -3.6011e-02, -3.2747e-02, -2.2136e-02, -7.3886e-02,
        -7.2577e-02,  3.5587e-02,  2.0932e-02,  6.3258e-02,  1.3308e-01,
         1.4527e-01,  1.3273e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1567e+00, -1.6783e+02, -1.0794e+00, -1.0612e-02, -1.4671e-01,
        -5.1590e-01,  1.0812e+00,  2.4052e-02,  1.0820e+00,  6.3185e-01,
         9.5872e-01,  5.5213e-01, -3.4479e-01,  2.4852e-01,  9.2956e-02,
         1.5514e-01,  4.6692e-01,  1.7009e-01,  8.1928e-01,  2.2873e-01,
         8.5612e-02,  8.7166e-02,  5.5387e-01, -1.9826e-01,  5.6167e-02,
         1.2773e-01, -3.5652e-02, -1.9045e+00,  6.8482e-01, -9.8368e-02,
        -2.2591e-01, -1.7757e-01, -1.9360e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.2876e-01,  1.4493e+02, -2.9352e-01,  9.7453e-01,  2.4534e-01,
         1.2707e-01, -1.0731e-01, -1.1278e+00, -1.0990e-01, -1.0222e-01,
        -2.4939e-01, -1.1199e+00, -9.7489e-01, -2.0092e-01, -3.6940e-01,
         7.8479e-03, -5.1909e-01, -2.6469e-01, -5.1903e-01, -1.6397e-01,
         2.5588e-02,  3.8409e-01, -3.8715e-02, -4.0648e-01,  3.4102e-01,
        -9.8298e-02,  1.6414e-01, -3.1437e-01,  1.8646e-01,  1.2386e-01,
         2.0161e-01, -5.1683e-02, -6.3972e-02, -8.8311e-02,  8.4949e-02,
        -4.0947e-01, -3.9642e-01, -2.4431e-01, -2.5353e-02,  4.2926e-02,
        -5.9339e-02, -3.0696e-01,  8.8034e-02,  5.1899e-01,  1.6309e-02,
         2.4258e-01, -1.8378e-01, -1.2005e-01,  1.3319e-02, -6.8964e-02,
         1.0588e-01,  9.0039e-02, -2.5852e-01, -2.2107e-02,  2.5329e-01,
         2.7534e-01, -1.7258e-01,  3.1118e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8344e-01,  1.5927e+01,  5.5684e-03,  3.3339e-03, -1.2649e-01,
        -5.8268e-02, -7.0070e-02, -4.5890e-02, -2.0521e-02,  1.9639e-02,
         5.4626e-02, -3.2008e-02, -1.4387e-01, -1.6284e-01, -4.1542e-02,
        -6.7896e-02, -3.2236e-02,  1.6573e-02, -6.9813e-02,  7.2406e-02,
         2.0160e-02, -1.2920e-03, -4.6811e-02, -8.8785e-03, -1.3566e-01,
        -6.8931e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3751e-02,  3.1519e+00, -1.0223e-01, -3.2550e-02, -3.3534e-02,
         2.1134e-02, -7.4556e-03, -7.7316e-03, -1.6352e-02, -2.7005e-02,
        -1.3184e-02, -5.8612e-03, -2.5164e-03,  1.0863e-02, -8.6754e-04,
        -4.0064e-03, -1.8421e-02, -1.3008e-03, -6.3773e-03,  2.2570e-03,
         3.6346e-03, -6.9830e-03, -1.8810e-03,  1.8748e-02, -4.8955e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4183e-01, -3.5267e+01, -3.7134e-01, -1.4594e-01,  2.6945e-01,
         1.2248e-01,  1.2809e-01,  2.6741e-02, -1.1077e-01,  2.1620e-02,
        -1.2816e-02, -1.3709e-01, -2.9888e-01, -2.8130e-01,  8.8083e-02,
         1.1795e-01,  4.9380e-03,  2.4065e-03, -2.7056e-02, -3.8319e-02,
         8.3253e-02, -2.5958e-02, -3.5037e-03,  1.2718e-01,  1.0459e-02,
         1.0091e-01,  1.4925e-02, -3.6223e-02,  2.4680e-02,  8.6501e-02,
        -3.5039e-03, -5.9779e-03, -2.0189e-01,  7.1991e-02,  2.1936e-01,
         5.8516e-02,  2.8166e-02,  8.7333e-02, -1.5254e-02, -3.7770e-02,
        -1.0527e-02,  7.9738e-02,  6.3807e-02,  8.6952e-02,  2.8626e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3825e-02,  1.9883e+00, -1.8489e-02, -6.5206e-03, -5.6935e-03,
        -2.8756e-03, -1.1585e-03, -1.0776e-02, -1.9588e-02, -5.0409e-03,
         5.1589e-03,  1.0839e-03, -3.4142e-03,  6.7930e-05,  4.1413e-03,
        -3.4946e-03,  4.2653e-03,  9.6443e-03,  1.3763e-02,  3.0056e-03,
        -9.8386e-03,  1.5259e-02,  1.4732e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0057,  0.3614, -0.0045, -0.0019,  0.0015,  0.0074,  0.0067,  0.0026,
        -0.0063, -0.0046,  0.0015,  0.0035,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 1.4703e+00,  1.4857e+02,  1.0341e+00, -2.0291e+00, -1.2413e+00,
        -4.5786e-02, -9.3290e-01, -6.9841e-01,  4.5504e-01, -8.8253e-01,
        -7.7239e-01, -3.4853e-01, -2.7262e-01, -5.5752e-01, -2.7779e-01,
        -6.7698e-01,  5.5359e-01, -1.1317e-01,  1.3171e-02, -1.1125e+00,
        -9.1597e-01, -2.1053e-01, -9.7887e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4745e-01,  5.4739e+01,  2.3972e+00,  3.2413e-02, -3.1744e-01,
        -2.1746e-01,  1.0812e-01,  1.4796e-01, -2.4215e-01,  2.9785e-01,
         1.1940e-01,  3.9596e-03,  7.8563e-04,  1.0929e-01,  1.8120e-01,
        -1.0379e-01, -2.2107e-01, -2.2293e-01,  5.9205e-03,  1.4498e-01,
         1.0413e-01, -3.3541e-02, -1.5824e-01, -7.5573e-02,  2.4525e-02,
        -1.6345e-01, -2.0184e-01,  2.2897e-02, -1.1866e-01, -8.9728e-02,
         9.9582e-02,  3.5405e-01, -4.6819e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1987e+00,  6.8036e+01,  1.8694e+00, -1.0942e+00, -3.5820e-01,
        -2.8080e-03, -5.2377e-01, -1.5263e-01,  1.9729e-01, -7.9158e-01,
         3.7733e-01, -5.4639e-01,  3.6633e-01, -2.2409e-01, -1.2520e-02,
        -7.9844e-03, -1.9538e-01,  3.1873e-01, -2.8673e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0724e-02,  5.0528e+00, -1.0063e-02,  5.0166e-02,  1.1146e-02,
        -1.9900e-02,  6.5046e-02, -4.3321e-03, -1.9014e-02,  9.2003e-03,
        -3.0671e-02, -1.9480e-02,  2.0703e-02,  4.5792e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3648e+00,  1.2534e+02,  2.2549e+00, -4.7776e-01,  1.5086e+00,
         5.1682e-01,  7.4443e-01,  5.3305e-01,  9.8721e-01,  1.2025e-01,
        -1.1810e-01, -8.6598e-02,  7.2503e-01, -7.5512e-02,  9.2871e-02,
         2.2460e-01,  4.6821e-01,  3.0753e-01,  2.5573e-01,  3.8168e-02,
        -1.1721e-01,  1.1523e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6654e+00, -6.3272e+01,  1.0426e+00,  1.8210e-02, -5.6329e-01,
         3.1422e-01, -8.1386e-02,  1.7197e-01, -1.1885e-01, -3.0208e-01,
        -3.2439e-02, -3.3637e-01, -4.4484e-01, -4.5100e-02, -5.2654e-02,
         1.0081e-01, -1.0969e-02, -1.3372e-01,  1.0704e-01,  1.4828e-01,
         1.2725e-01, -1.0661e-01, -1.2282e-01,  8.8089e-02,  2.6235e-01,
        -3.2627e-02, -6.5244e-02, -8.9893e-02, -5.3683e-01, -5.2803e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1574e-02,  1.5020e+00,  5.4700e-03, -1.6036e-02, -1.2025e-02,
         2.1203e-02, -1.0269e-02, -7.7031e-03,  6.0456e-03,  4.2538e-04,
        -5.2138e-03, -6.0928e-03, -1.1967e-03, -2.1878e-03,  3.1551e-03,
        -2.1657e-02, -3.7044e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6565e-01,  9.9793e+00,  1.7508e-01, -8.6176e-02, -2.1748e-03,
         8.9043e-02, -2.0263e-02,  1.7431e-01, -2.2414e-01, -6.3353e-02,
         8.8131e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0338e-01, -1.1745e+02, -1.5271e+00,  1.3795e+00, -2.1288e-02,
        -1.1016e+00,  5.8897e-01, -1.5244e-01, -1.0014e-03,  1.5821e-01,
         5.8590e-01,  2.2643e-01,  4.5166e-01, -1.7372e+00, -1.6054e+00,
         8.0490e-01,  9.6969e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0298e-01,  1.3114e+02,  1.4993e+00,  3.1548e+00,  7.7013e-01,
         5.1912e-01,  6.9917e-01, -2.1334e-01,  4.5513e-01,  3.1452e-02,
        -1.8668e-01, -5.2841e-02,  9.0380e-02,  5.6021e-01,  2.1140e+00,
         3.5313e-01,  1.4230e-01, -2.7352e-01, -4.5507e-01,  8.5994e-02,
        -5.7986e-02,  4.5815e-01,  9.8883e-01,  4.7026e-01,  7.1323e-02,
         6.1905e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9642e+00,  5.5779e+01,  7.5692e-02, -4.3149e-01, -1.6034e-01,
         4.9474e-01,  4.9875e-02,  1.4859e-02, -9.5071e-02,  1.1964e-02,
         3.9555e-04, -5.4663e-02, -3.9797e-02, -2.1990e-02, -1.4015e-02,
        -1.8078e-02, -1.1271e-01,  3.6099e-02, -4.3299e-02, -1.3525e-01,
        -1.0289e-01, -1.0519e-01, -1.4075e-01,  3.0448e-01,  1.3238e-02,
        -2.2200e-01,  2.7545e-02, -1.3303e-01, -3.6365e-01, -1.4831e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0067e-04,  1.6517e+00,  2.1514e-02, -7.7443e-03,  1.5641e-02,
        -1.1586e-02, -4.1741e-03, -8.2120e-03,  7.4309e-03,  1.6862e-03,
         5.3232e-03,  2.3532e-03,  3.9470e-04, -4.5658e-03, -2.5589e-03,
         1.7680e-03, -3.7444e-03, -3.2342e-03,  2.3297e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-8.7234e-03, -7.5718e+01,  1.2407e+00, -6.8013e-01,  6.6576e-01,
         5.0463e-01, -6.8922e-01,  5.8848e-01,  2.5509e-01, -4.6342e-01,
         8.6941e-02, -6.4408e-02,  3.0920e-01, -1.7704e-01,  4.6820e-01,
         2.1228e-01, -4.6373e-01, -7.7827e-01,  2.5577e-01, -1.9075e-01,
         6.8293e-01, -1.3808e-01,  2.2778e-01,  5.0842e-01, -3.0816e-01,
        -6.0993e-01, -2.9140e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9594e-02,  2.5035e+00,  1.6487e-02, -3.2896e-02,  5.3319e-03,
         1.5876e-02,  9.3597e-03,  1.0934e-02,  2.4992e-02, -2.0496e-03,
        -3.8697e-03,  1.3737e-02,  1.9029e-03,  5.9139e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7272e-01,  8.3829e+00,  7.8751e-02, -1.5652e-02,  5.4496e-03,
         4.4937e-02,  9.1558e-03,  9.5520e-03,  5.6154e-02,  1.9600e-02,
         6.2817e-03,  3.0308e-02, -1.5474e-02,  5.5064e-02, -1.2960e-02,
        -6.5818e-02,  1.4473e-02, -6.3511e-02,  8.6093e-02,  3.3387e-02,
        -3.7522e-02, -3.4058e-02, -3.7764e-02, -2.1627e-02,  1.5872e-02,
        -7.9283e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3930e+00, -1.6417e+02, -9.8185e-01,  9.9178e-01,  1.4167e+00,
         3.2473e+00, -5.9367e-01,  1.1776e+00,  4.8533e-01, -3.5833e+00,
        -4.3385e-02, -6.5107e-01,  2.6762e-01,  2.3312e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0386e-01,  6.0586e+00,  6.6773e-02,  8.2007e-03, -6.1688e-03,
        -1.1150e-02,  5.4469e-02, -1.2711e-02,  2.2491e-02,  2.3904e-03,
         1.2887e-02,  1.8109e-02,  1.4196e-02,  1.1001e-02,  3.7677e-02,
        -5.8269e-03, -2.0065e-02,  2.3853e-03, -8.2581e-03, -1.8975e-02,
        -1.5701e-02, -7.2149e-03, -1.7858e-02, -4.1734e-02,  4.4326e-03,
        -5.6393e-03, -7.5011e-02,  3.3162e-03,  6.7869e-03, -6.4529e-03,
         3.2146e-02, -2.0691e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3057e+00, -3.4603e+01,  4.6596e-01, -3.0912e-01,  1.4837e-01,
         1.0038e-01,  2.6592e-01,  3.9583e-01,  1.8804e-01,  9.9728e-02,
         3.1995e-02, -1.3372e-01,  1.4881e-02, -1.1689e-04, -7.1183e-02,
         1.6990e-01,  2.4319e-02,  1.3660e-01, -1.2961e-01, -6.0741e-02,
         1.7655e-02, -8.2033e-02,  3.8036e-04,  9.9366e-02,  8.4740e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6513e-01,  1.7054e+01,  3.1693e-01,  1.2171e-02,  5.9773e-02,
        -1.9909e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0295e-01,  1.9890e+01,  1.3209e-01, -2.2032e-02,  1.6907e-01,
         7.2853e-03,  8.7306e-02, -3.2329e-02, -1.4414e-02,  5.2046e-02,
        -5.0315e-02,  6.5851e-02,  2.4847e-02,  7.8676e-02,  1.5697e-03,
        -5.1705e-02,  5.5927e-02,  6.3534e-02,  3.0852e-02, -7.4842e-03,
        -8.1308e-02,  2.7602e-02, -6.7335e-02,  1.6568e-02,  3.5377e-02,
         2.8317e-02, -1.4218e-02, -2.0732e-02, -3.4636e-02,  1.4796e-02,
         2.2486e-02,  1.0848e-02, -8.2050e-03, -5.7940e-02, -3.0866e-02,
         9.6098e-03, -7.7819e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8454e-01, -7.8650e+00, -3.7032e-02, -2.1032e-01, -3.9082e-03,
        -7.8028e-03, -1.4596e-02, -5.3769e-02,  1.1493e-02, -1.0009e-02,
        -3.4335e-04, -3.3602e-02, -2.8117e-02, -2.4011e-02, -8.2314e-03,
         5.8573e-02,  2.5749e-02, -6.6807e-02, -4.8307e-03, -2.8669e-02,
         6.0536e-03,  6.2626e-03,  6.2695e-03,  6.2869e-03,  9.5266e-03,
         2.0559e-02,  4.8246e-02,  7.9431e-02,  1.6214e-02, -4.5491e-02,
         2.7628e-03, -1.0920e-02, -1.5507e-02,  1.9697e-02,  4.7131e-02,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7011e-01,  1.0144e+02, -3.6934e+00, -4.4051e-03, -2.1600e-01,
         7.4023e-02, -2.5595e-01,  5.6184e-02,  6.3479e-01,  1.5448e+00,
         1.0612e+00, -1.2119e+00, -1.1330e-01, -1.4780e-01,  3.2386e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8451e-01, -6.6196e+00, -4.3716e-02, -2.2459e-03, -2.0776e-02,
        -1.2670e-02,  1.4433e-02,  2.1681e-02, -4.7844e-03, -1.1833e-02,
         7.3664e-03,  2.1315e-03, -3.1901e-02, -5.5766e-03,  4.0838e-03,
        -3.5660e-02, -1.8608e-02, -1.1818e-03, -3.3548e-04, -8.9161e-03,
        -6.6473e-03, -1.9350e-02, -1.2929e-02, -1.7940e-02,  8.1636e-04,
         3.4582e-03, -1.3437e-02,  3.8179e-03, -2.4145e-02, -6.6388e-03,
        -8.8118e-03, -4.7281e-03, -5.6146e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3331e-01, -4.0548e+01, -6.3139e-01, -3.5590e-02,  8.2341e-02,
         3.8963e-01,  1.9907e-01,  5.7798e-02,  5.3338e-01,  1.0320e-01,
         1.2801e-01,  2.4660e-01,  1.7678e-01, -2.6818e-01,  4.7570e-02,
         8.9209e-02, -5.4742e-02,  4.0268e-02,  4.7348e-02,  1.5374e-02,
         9.0889e-02,  1.6252e-01,  7.9183e-02,  9.5110e-02, -1.3872e-01,
        -1.2121e-01, -6.5220e-02, -1.0661e-01,  2.3258e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 1.1844e+00,  1.7807e+02,  1.0676e+00, -9.4045e-01,  3.2043e-01,
        -1.2093e+00, -5.9282e-01,  3.4465e-01, -2.4416e-01, -9.5676e-01,
         2.7508e-02,  1.2181e+00,  6.8593e-01,  6.2577e-01, -2.5633e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7652e-01,  2.9978e+01,  8.2289e-02, -6.8073e-02, -8.2492e-02,
        -1.8752e-02,  9.5583e-02,  1.3461e-01,  1.9010e-01,  5.0460e-02,
        -5.0382e-02, -8.5074e-02, -3.4625e-02,  6.5017e-02,  5.8951e-02,
        -1.4001e-01, -4.3309e-03,  1.2996e-02,  1.6905e-04,  1.7172e-02,
         4.6895e-03, -3.8426e-02, -5.9636e-02,  1.0317e-02,  1.0607e-01,
         3.4626e-02, -4.5804e-02,  1.3084e-02, -4.1027e-02, -1.8747e-02,
         1.5866e-02, -7.1634e-02, -1.0383e-02,  1.4528e-02,  2.0528e-02,
         3.2490e-02, -1.3025e-04,  1.3160e-02,  5.4894e-02, -7.0468e-02,
        -6.0062e-03,  6.6019e-02, -3.1759e-02, -2.7215e-02,  4.4813e-03,
         4.3331e-03, -3.3814e-02, -3.6878e-02,  6.3526e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5605e+00, -5.5296e+01,  2.4861e+00, -5.4492e-01, -3.0897e-01,
        -5.0656e-01, -2.4231e-01,  3.8652e-01, -7.4301e-02, -2.8414e-01,
         1.7203e-01, -1.4069e-01, -4.4008e-02, -9.2199e-02, -1.9467e-02,
         9.0925e-02, -2.8018e-01, -9.1007e-02,  1.5748e-01, -2.6812e-01,
         1.2429e-01, -1.7058e-01,  1.8524e-03, -2.1401e-01, -1.0312e-01,
        -3.6232e-02,  8.6753e-02,  7.3472e-02, -1.1417e-01,  3.7846e-01,
        -9.4463e-02,  9.6461e-03, -1.4254e-01, -5.0837e-01,  4.6064e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5844e+00,  1.8655e+02, -1.9160e+00,  3.0091e-01,  4.9602e-01,
         1.4828e+00, -2.0311e+00,  3.8662e-01, -4.4085e-01, -5.8345e-01,
         3.1899e-01,  2.4990e-01, -6.3516e-02, -7.7650e-01,  2.7488e-01,
        -7.3687e-01,  6.0631e-01, -5.4951e+00,  5.0757e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -1.3727, 204.7123,   1.2418,   0.6201,   0.8322,   0.8508,  -0.2437,
         -0.5919,   0.7510,  -0.6781,  -2.8589,   0.2340,  -3.0898,  -4.1472,
         -2.5283,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3799e-01,  7.6429e+01, -3.5625e-01,  2.6323e-01,  1.0170e-01,
         2.0404e-01,  7.0583e-02,  2.6985e-01,  4.0073e-02, -4.0776e-01,
        -2.7458e-02,  1.8317e-01, -3.8756e-01, -6.0097e-01, -1.9797e-01,
        -2.5731e-01,  9.8304e-02, -9.5111e-02,  2.2262e-03, -2.9097e-01,
        -7.1596e-01, -9.6580e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5564e+00, -9.7308e+01,  2.2016e+00,  4.1513e-01,  2.2850e-01,
         5.0166e-01,  4.0920e-01, -1.4247e-01,  7.6365e-01,  6.7846e-01,
         5.3024e-01,  4.1949e-03, -2.3934e-01,  5.5583e-01, -2.4126e-02,
        -2.9122e-02,  7.0247e-01, -1.5333e-01, -8.9185e-02, -1.5304e-01,
         3.5579e-01,  5.7606e-02, -6.4989e-02, -1.4512e-01, -5.4977e-01,
         4.5933e-02,  2.4654e-01,  1.2984e-01, -5.0597e-01,  6.1748e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4770e-01,  7.6139e+01, -2.0965e+00, -7.5718e-01, -5.0064e-01,
        -3.1230e-01,  2.7717e-03, -4.9976e-01,  7.9101e-01,  1.0249e+00,
        -9.4589e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4902e+00, -1.9508e+02, -2.1664e+00,  2.1798e-01, -1.0904e-01,
        -8.9758e-01, -7.9769e-02,  8.5057e-02, -1.1774e+00, -1.1670e-01,
         5.4258e-01, -6.4374e-01, -9.9973e-02, -6.4986e-01,  2.9244e-01,
        -6.8563e-01,  4.1895e-01,  2.4665e-01, -2.9410e-01,  4.4921e-01,
        -2.4174e+00,  6.0577e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5199e-01, -7.1232e+01,  2.9473e-01,  2.0945e-01,  1.2957e-01,
        -6.2916e-01, -2.9641e-01, -3.5320e-01,  5.3362e-01, -1.3023e-01,
         5.4898e-03, -2.3637e-03, -2.4766e-01,  3.4939e-01,  8.6291e-02,
         8.5322e-02, -3.4160e-01, -1.5415e-01,  6.0467e-01, -1.3545e-01,
         5.5903e-02,  1.7161e-01,  7.7181e-03,  2.9928e-02, -1.1897e-01,
         8.9805e-03,  6.6508e-02, -2.2870e-01,  1.7995e-01,  3.5584e-01,
        -3.5912e-01, -4.5367e-01,  1.6594e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2970e+00,  1.6476e+02,  5.3099e+00, -4.7163e-01,  6.3777e-01,
         1.1417e+00,  2.5640e-01, -2.4038e+00,  4.1373e-01, -1.0156e+00,
         1.2929e-01, -3.8509e-01,  4.0113e-02,  5.8072e-02,  3.5065e-01,
         2.5589e-01,  9.4170e-01, -2.0308e-01, -1.7990e-02, -4.0365e-01,
         9.0143e-01, -2.1864e+00,  1.2939e+00,  2.1932e-01,  1.6510e-01,
         1.1458e-01,  6.0874e-01,  8.5623e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7106e-01, -4.5762e+01, -2.3941e+00, -9.4596e-02, -4.6991e-02,
         2.3090e-01,  2.9394e-01, -5.2575e-01,  2.2606e-01,  4.8641e-01,
         1.2962e-01, -1.8498e-01, -4.2433e-02, -6.7105e-01, -1.3816e-01,
        -4.1002e-01, -8.9792e-02,  1.1652e-01,  8.5341e-03,  8.3454e-02,
         7.2772e-02,  7.6521e-02,  1.5624e-01,  5.2337e-01,  1.5364e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 2.1215e-01,  1.1602e+01,  2.3386e-01,  1.8861e-02,  1.9638e-03,
        -6.5858e-02, -1.1906e-04, -6.2166e-03,  2.4836e-02,  9.8731e-03,
         4.9522e-02,  8.3584e-04, -2.9285e-02,  4.7809e-02,  9.0123e-03,
        -4.1117e-02, -2.7018e-02,  1.2554e-02,  9.5869e-03, -2.2144e-04,
        -5.1465e-04, -1.3988e-02, -3.2140e-02, -1.8652e-02,  1.8118e-02,
         2.3974e-04, -1.8417e-02, -4.0210e-02, -2.3307e-03,  5.9898e-02,
        -5.6294e-02, -3.9575e-02,  1.4283e-03, -1.7692e-02,  1.2000e-02,
        -2.5861e-02, -3.2370e-02, -1.5666e-02,  9.7228e-03, -1.4518e-02,
         2.4472e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7037e-01,  1.1446e+01, -6.9844e-03, -6.8303e-03,  9.9594e-02,
         3.5983e-02, -3.4390e-02,  1.4928e-02, -2.2261e-02,  2.9021e-02,
         3.2120e-02,  6.7100e-03,  7.6605e-02,  1.0205e-04,  7.4791e-02,
        -6.3748e-03,  4.9625e-02, -2.7850e-04, -1.7746e-03, -1.4890e-01,
        -5.5859e-02,  2.5651e-02,  3.0076e-03, -2.1833e-02,  1.4684e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8101e+00, -1.8011e+02, -2.6735e+00,  2.1185e+00, -2.3186e-02,
        -1.4686e+00, -3.7420e-01, -2.0550e+00,  7.9903e-01, -3.2296e-02,
        -1.0377e+00, -6.4411e-01, -4.1242e-01, -6.2446e-01,  1.5918e-01,
        -1.0749e-01, -3.5990e-01, -1.7018e-01,  9.5607e-01, -1.3565e+00,
         3.1005e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5482e-01,  4.3610e+00,  3.5999e-02, -3.9035e-02, -2.5091e-02,
        -1.5521e-02, -2.5372e-02, -4.3842e-02, -2.0259e-02, -4.3090e-02,
        -4.4521e-02, -2.2008e-02, -2.1264e-03, -5.0640e-02, -1.3557e-02,
        -4.3123e-02, -4.0035e-02, -9.6160e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8088e-02,  3.0129e+00,  1.3649e-02,  6.0501e-03, -2.9477e-03,
         2.5407e-02,  1.7449e-02,  1.1334e-02, -1.4002e-02,  1.1897e-02,
         9.7050e-03, -6.1527e-03,  9.9795e-03,  1.9819e-02,  5.3855e-03,
         1.1867e-02, -3.8955e-03, -7.2501e-03,  1.3605e-02, -6.2350e-03,
         2.4847e-04, -2.2141e-03,  2.3340e-03,  1.7199e-03, -8.5199e-04,
        -1.3799e-03, -3.1668e-03, -4.0957e-03,  4.9819e-03,  6.0834e-04,
         5.4677e-03,  8.8756e-03, -4.8153e-03, -1.7212e-03,  1.9196e-02,
         1.7648e-02,  2.2839e-04,  1.7127e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3675e-02, -1.3118e+01,  1.3563e-01,  8.4292e-02, -1.1907e-01,
         6.1936e-03,  2.2040e-02, -9.1819e-02, -1.5044e-02, -5.0363e-02,
         3.2299e-02,  6.1862e-02, -5.3516e-03,  2.3705e-02, -9.1538e-02,
        -2.3971e-02, -5.8784e-02,  2.8334e-01,  2.0273e-02, -1.2038e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0199, 70.0382, -0.5323,  0.6164,  0.2857, -0.2134,  0.2824, -0.1166,
         0.5309,  0.8577,  1.4536,  0.2745,  0.3931, -0.6863,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4294e-02,  1.2861e+00,  5.2488e-03,  9.7271e-03,  5.1103e-03,
        -5.9129e-04,  4.3899e-03,  3.0234e-03,  5.8632e-03,  7.5097e-03,
        -1.6423e-03,  5.5431e-03,  1.0082e-03,  2.5111e-03,  6.8649e-03,
         3.5140e-03,  5.3987e-03,  3.9850e-03,  5.5462e-03,  1.9852e-03,
        -4.7760e-03,  6.0977e-03,  5.8230e-03,  3.8989e-03,  4.8225e-03,
        -4.7394e-03,  2.2102e-03,  5.2955e-03, -3.3845e-04,  2.7105e-04,
         2.3916e-03, -3.6889e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1877e+00, -7.6271e+01, -9.9166e-01, -1.2043e+00, -6.1778e-01,
        -6.3742e-01, -4.8426e-01,  1.8666e-01, -1.6914e-01,  3.3255e-01,
        -7.3231e-02,  1.1122e+00, -1.3573e-01, -5.3885e-02, -1.1771e-01,
        -7.4556e-02, -7.4945e-02,  3.3818e-01,  6.2723e-01, -1.2739e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1974e-02,  8.1187e-01,  2.6496e-02, -8.6925e-03, -1.0508e-03,
         4.0815e-03, -1.8401e-03, -2.6665e-03,  1.0244e-03,  6.2243e-04,
         4.2880e-04,  1.9978e-03, -4.0948e-04, -1.2247e-03, -3.2991e-04,
        -3.9974e-03,  1.1567e-03, -2.5925e-03,  1.8255e-03, -1.7879e-03,
         3.0002e-03,  3.8049e-03, -6.3109e-04,  1.0754e-03,  1.7294e-03,
        -1.0369e-03, -6.0687e-03, -3.3614e-03,  7.4237e-04, -1.2536e-04,
         2.5600e-03,  5.0155e-04,  2.0131e-04,  5.4196e-03,  1.1679e-03,
         1.3774e-03, -7.3300e-04,  3.8980e-04,  1.7860e-03,  9.6032e-04,
        -2.6229e-03, -1.2769e-03, -7.6009e-04,  4.7892e-04,  5.9877e-05,
        -8.9394e-04,  1.5166e-03,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8846e-02,  6.0889e+00,  5.8792e-03, -5.4759e-02,  1.3359e-01,
         2.4243e-02, -4.4026e-03,  1.3384e-02, -2.9704e-02, -2.0307e-02,
         8.1013e-03, -2.6994e-02,  5.1843e-03,  3.3352e-02, -6.8844e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0297e-02,  2.6004e+00, -3.2782e-03, -7.3149e-03, -2.1020e-02,
        -6.4495e-03, -2.5523e-03, -8.1450e-03, -1.8933e-02,  9.4559e-03,
         1.9718e-03, -8.9043e-03,  7.8144e-03,  3.5513e-03, -4.1721e-03,
        -1.2912e-02,  1.6324e-02,  4.5036e-03, -4.3573e-03, -4.0268e-03,
        -3.8296e-03, -9.4206e-03, -6.1130e-03,  1.3842e-02,  2.4337e-03,
        -4.0062e-03,  9.4170e-03, -1.1859e-02,  8.5082e-03,  5.7266e-03,
        -9.3757e-04, -3.6102e-03,  9.2977e-03,  5.9350e-04,  3.7947e-03,
         8.5349e-03, -9.0077e-04, -2.9168e-03, -8.8100e-03,  1.1359e-03,
        -3.3601e-03, -2.6397e-03,  9.8528e-04, -2.8436e-03, -4.7854e-03,
        -9.9711e-03, -7.1567e-05,  4.2151e-04], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-5.7091e-01,  1.7871e+02, -2.3971e+00, -1.4606e+00,  1.4646e+00,
        -1.0849e+00,  1.3308e+00, -4.0457e-02, -2.4255e+00, -4.6070e-01,
        -2.4115e-01,  5.0424e-02, -2.2619e-01,  1.3031e-01, -1.9296e-01,
        -2.1414e-01, -1.3803e+00, -6.2454e-01, -1.4932e-01, -2.3928e-01,
         2.7479e-02,  1.0813e-01, -1.3374e-01, -5.8773e-01, -2.2696e-01,
        -3.3448e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2401e+00, -2.4304e+02,  2.3743e-01, -3.1596e-01, -4.4586e-01,
         1.1312e-01, -1.5578e+00, -1.6938e+00,  7.0525e-01,  4.4027e-01,
        -1.2631e+00,  6.5523e-01, -8.9057e-01,  1.0293e-01,  9.7415e-01,
         7.9549e-01, -3.8442e+00,  1.1200e+00, -2.5889e+00, -5.6836e-01,
        -9.4043e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1182e-03,  1.7183e+00, -8.0934e-03, -9.9020e-03, -1.2111e-02,
         1.7071e-03,  3.7712e-04, -1.1084e-02, -5.9232e-03,  6.3942e-04,
        -2.0567e-03,  4.0010e-03, -2.8099e-04, -1.3022e-02,  5.7096e-03,
        -5.0141e-03,  2.5580e-03, -1.7421e-02, -3.5217e-03, -6.6855e-03,
         2.5583e-03, -1.4943e-03, -1.1935e-02,  4.2052e-03,  3.3053e-04,
        -1.1733e-02,  1.5734e-04,  1.7869e-03,  5.6498e-03,  1.2327e-03,
        -8.3590e-03,  5.3670e-03,  5.1230e-03, -1.5517e-03, -5.3612e-04,
         9.7686e-03,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1717e-01,  3.4090e+01, -1.7184e-01,  2.4049e-01, -4.0591e-02,
        -1.5197e-01,  1.2065e-01,  1.7670e-01,  1.0995e-02,  1.4365e-01,
         8.8084e-02,  1.0182e-01, -1.0573e-01, -4.5078e-02, -7.3295e-02,
        -8.6686e-02, -7.3951e-02, -7.9556e-02,  5.4720e-01, -5.0748e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.8146e-01, -3.1118e+01, -3.2691e-01, -1.9611e-01, -6.8414e-02,
         4.3590e-02, -6.5077e-02,  2.0802e-01, -1.5261e-01,  2.9683e-02,
         1.3086e-01,  1.7476e-01, -1.0776e-01, -8.3073e-04,  6.5616e-02,
        -3.4737e-02, -1.8702e-01,  2.5526e-01,  4.8930e-02,  3.9940e-03,
        -2.4783e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3746e+00, -4.8482e+01,  8.5448e-01, -3.0630e-01,  2.6653e-01,
        -7.9912e-02, -5.6955e-02,  7.6790e-01,  7.7312e-01,  1.3878e-02,
        -6.0819e-02,  6.6961e-01, -4.9009e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6456e+00,  3.6164e+01,  1.5937e+00,  3.9063e-01, -2.1039e-01,
         1.1280e-02,  3.0209e-01, -1.1911e-01,  2.0038e-02,  9.8379e-02,
         6.0857e-02, -6.4673e-02,  8.1038e-02, -8.0759e-02,  1.5472e-01,
        -1.5126e-02, -7.7743e-02,  3.9613e-02, -4.8664e-02, -1.9333e-01,
         8.4602e-02,  1.1491e-01,  2.9698e-02, -9.3392e-03,  1.1731e-01,
         7.6573e-02, -1.1858e-01,  3.6049e-02,  6.3170e-02,  8.7548e-02,
         1.1063e-01, -1.0136e-01,  9.5215e-02, -4.6443e-02,  1.3574e-01,
         3.0501e-01, -1.5571e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2945e+00, -1.5391e+02, -1.4647e+00,  1.5470e+00,  4.3428e-02,
         3.3961e-01, -6.5360e-01,  1.8725e+00,  3.8991e-01, -8.1607e-03,
         2.2141e-01, -2.7810e-01, -1.5068e-01,  6.3904e-01,  4.3150e-01,
        -7.8126e-01,  4.1577e-01,  2.4447e-01,  8.2170e-01,  6.9278e-01,
        -1.1284e-01,  4.3786e-01, -1.9393e+00, -6.2845e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2086e-01, -6.8311e+01, -1.7238e+00, -6.7799e-01, -8.3930e-02,
        -1.2823e-01, -1.3556e-01, -8.0539e-01, -2.5287e-01,  3.3884e-01,
         7.0778e-01, -3.3319e-01, -4.5682e-01, -2.0110e-01, -5.9572e-01,
        -8.1155e-02,  3.4829e-02,  9.9470e-02, -1.9914e-01,  5.8018e-02,
        -3.0710e-02,  2.3072e-01, -8.8121e-02,  2.7742e-01,  5.0908e-02,
         4.5283e-02,  8.7291e-02,  2.7189e-01,  3.6257e-02, -2.3292e-02,
        -1.5455e-01,  3.2174e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2607e-01,  1.2830e+01, -6.9010e-02, -1.1620e-01, -7.2391e-03,
        -1.6661e-01,  4.7307e-02, -8.2713e-02,  6.9538e-02,  5.0723e-02,
         3.7727e-02, -2.9728e-02, -1.2990e-02, -1.2730e-04,  2.8946e-02,
         8.5699e-04, -1.7298e-02, -2.4697e-02,  9.0418e-04,  7.3351e-02,
         1.2971e-01,  4.4241e-03, -2.7987e-02,  1.6687e-02, -1.5010e-02,
         1.0029e-01, -9.0374e-02, -3.6933e-02,  4.6412e-02,  6.9282e-02,
         4.8145e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8236e+00, -1.2721e+02, -8.5964e-01, -2.3535e-01, -4.4057e-02,
         3.2186e-01,  2.7968e-01,  3.0450e+00,  1.5410e+00,  2.7674e-01,
        -1.2003e+00, -2.6414e-01, -1.2713e+00,  1.3715e+00, -4.3063e-01,
         1.4536e-01, -2.1503e-01, -5.5369e-01, -9.4491e-01, -7.5943e-02,
         1.0407e-02, -1.3246e-01,  1.3675e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.6835e-02,  5.6120e+01, -6.9353e-02,  2.3847e-02, -5.2478e-01,
         1.8579e-01, -1.0316e-01,  7.9077e-02, -1.2427e-02,  5.4391e-02,
        -5.4744e-03, -4.5251e-02, -3.6252e-02, -1.0144e-01, -2.6267e-01,
         1.5796e-02, -1.8015e-01, -7.0609e-02, -9.2768e-02, -3.1860e-02,
        -2.0768e-01, -2.8681e-01, -1.3562e-01, -3.6654e-02, -3.5386e-03,
         2.1781e-01,  1.1115e-01,  3.7327e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 4.8292e+00, -1.7683e+02, -5.6074e-01, -3.3950e-01, -4.7442e-01,
         1.9609e-01, -4.4578e-01,  6.4442e-01, -3.5632e-01,  3.9247e-01,
        -3.9301e-01,  4.6501e-02, -4.6525e-01,  1.0870e-01,  2.6367e-01,
        -6.7425e-01,  2.6055e-01, -7.4415e-01,  3.9235e-01,  1.4322e-01,
        -3.7828e-01,  3.1152e-01, -6.6433e-01,  2.1871e-01, -2.6783e-01,
        -5.2238e-01, -9.4376e-01,  1.1688e+00, -1.0009e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3160e+00, -2.0775e+02, -3.3700e+00,  7.5013e-01, -3.8762e-01,
        -3.6938e+00,  3.5265e-01, -6.5748e-01,  1.8833e-01,  1.8539e-01,
         3.6495e-03,  3.8273e-01, -2.4453e-01, -2.8869e-01, -6.2298e-01,
        -4.7363e-03,  5.9874e-01, -4.3459e-01, -2.2886e+00, -7.1167e-01,
        -2.8711e-01,  3.9219e+00,  1.0895e+00,  1.5000e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3154e-01, -1.7199e+01, -1.1679e-01, -7.1530e-02, -6.9175e-02,
        -3.2296e-01, -1.8255e-02, -1.3345e-02,  5.8553e-02, -7.8671e-02,
        -1.9855e-02,  1.6144e-01, -1.3522e-01, -8.5685e-02,  7.5407e-02,
        -1.4439e-02,  5.5723e-04, -2.3058e-04,  1.3540e-02, -1.6712e-02,
         6.3518e-02,  8.8007e-02, -7.1558e-03, -9.6119e-02, -1.1692e-02,
         4.1143e-02,  7.9338e-02,  4.9155e-02,  8.7262e-02, -1.1363e-01,
         4.0019e-02, -4.7404e-02,  5.3869e-02,  1.3314e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4286e-01,  1.2738e+02, -2.1486e+00,  1.9860e-01, -3.0677e-01,
        -6.2584e-02,  3.2918e-01,  5.6578e-02,  1.0635e-01, -1.9096e+00,
         3.9355e-01, -6.1977e-02, -3.0854e-01, -2.1278e-01,  1.2295e-01,
        -4.2109e-01, -1.0331e+00,  1.0875e-01, -1.9784e-01,  4.1828e-01,
         7.0691e-01,  1.1063e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8859e-01,  4.5454e+01,  4.2368e-01, -3.1964e-01,  5.5415e-01,
         2.8463e-02,  2.2512e-01,  2.9045e-01, -3.9040e-02, -1.3675e-01,
         6.6414e-01, -9.3775e-02,  1.3810e-01, -1.4042e-01, -1.4835e-01,
        -1.3100e-01,  3.9167e-01, -1.5891e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5050e-01,  1.1068e+02,  9.0717e-01,  2.1802e-01,  4.0182e-01,
         9.8763e-02, -1.8849e-01, -8.4463e-02, -4.6245e-01,  2.4824e-02,
        -1.8913e-01, -3.6193e-01, -5.3589e-01, -1.1778e-01, -1.6566e-01,
        -5.3961e-02, -1.5470e-01, -2.8343e-01, -5.8743e-01, -3.2133e-01,
        -3.6077e-01,  1.5110e+00,  2.4776e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.5780e-01, -1.2108e+02, -3.6999e-01,  9.1569e-01,  1.3040e+00,
         5.5036e-01, -3.3716e-01,  8.5672e-01,  9.7930e-01,  5.6279e-01,
         9.0832e-01,  2.6056e-01,  3.9556e-01, -6.0172e-01,  4.3859e-01,
        -9.3293e-02, -9.6310e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6534e+00, -1.6635e+02, -4.1817e+00,  4.5161e-01,  9.0711e-01,
         5.9646e-01,  6.6503e-01,  1.0890e-01, -1.7655e+00, -2.1480e+00,
         5.2845e-01,  2.0305e-01,  9.7862e-02, -6.7983e-01, -9.6347e-01,
        -1.1910e+00, -1.8931e+00,  9.5865e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7835e+00,  1.4617e+02, -1.6714e+00,  3.6265e-01,  7.8207e-01,
         3.9883e-01, -6.7825e-01, -1.3484e+00, -1.1727e-01, -7.6774e-01,
         1.5241e-01, -5.6969e-01, -1.1124e-01, -9.8528e-02,  5.2190e-03,
        -3.2156e-01,  9.6795e-01, -1.6432e+00,  3.1208e-02, -5.6056e-01,
        -7.6839e-01, -6.5725e-01,  4.0693e-01,  1.2834e-01,  7.8537e-01,
        -7.2428e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7272e+00, -1.0232e+02,  1.9033e+00, -2.9029e-02, -7.5297e-01,
        -3.9773e-01, -6.0869e-03, -4.5086e-01, -1.5345e+00,  9.2185e-01,
         2.6361e-01,  2.2041e-02,  2.1832e-01,  1.1154e-01,  4.8305e-01,
         1.1512e-01, -3.3294e-01,  5.4575e-02,  1.7549e-01, -1.4835e-01,
         8.5711e-02, -6.2430e-01, -1.4523e-01, -3.9280e-01, -2.1474e-01,
        -1.3671e-01,  2.2778e-02, -8.2806e-02, -1.2901e+00,  6.6293e-03,
        -2.3071e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9962e-02,  1.4933e+00,  3.6573e-02, -1.3394e-03, -2.7883e-02,
        -4.0403e-03,  6.7392e-04,  9.3744e-03,  2.6278e-02, -7.7462e-03,
        -2.8604e-03, -4.3963e-03, -6.1647e-03, -6.1266e-03,  2.3932e-02,
         2.5403e-04,  1.6766e-02,  3.1533e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2955e-02,  4.4924e+00, -2.4340e-01, -4.7591e-03, -5.8712e-04,
         3.9188e-02,  7.7716e-02,  2.1818e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 2.2034e-01,  1.0489e+01,  3.7225e-02, -9.4861e-02, -1.1097e-02,
        -4.7252e-03, -4.3971e-02, -2.0438e-02,  1.8362e-02, -1.0737e-02,
        -1.2198e-02,  4.0382e-02,  6.1209e-02,  1.3353e-02,  1.4787e-02,
        -8.4876e-03, -1.7641e-02, -4.0282e-03,  3.8984e-02,  1.4789e-02,
         9.9786e-03,  1.7810e-02, -6.2560e-03, -6.9984e-03,  1.9676e-03,
        -1.7462e-02, -1.4787e-02,  1.1458e-02, -4.3539e-04, -1.8940e-04,
         1.7511e-02,  2.7100e-03, -6.5163e-03,  3.8163e-02,  1.1026e-02,
        -1.2906e-02,  1.4394e-02,  1.6735e-02, -7.2052e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2462e-01, -2.8017e+01,  1.2332e+00,  1.9305e-01, -9.3500e-02,
        -1.7398e-02, -9.5990e-02,  8.7557e-02,  1.9220e-01,  6.4154e-02,
         9.3538e-02, -3.0153e-02, -1.0660e-01,  1.2840e-03,  2.8367e-02,
         1.0645e-01, -1.4698e-01,  6.8238e-02, -2.2477e-02, -2.2144e-01,
        -4.2192e-01, -1.4163e-01, -1.4850e-01,  2.6566e-02, -1.9658e-02,
         2.1226e-02,  7.0560e-01, -1.5136e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1351e-03, -1.0200e+01, -1.8398e-01, -1.0261e-01,  2.7818e-02,
        -1.5703e-03, -2.4584e-02,  8.2069e-03,  3.1596e-02,  1.6021e-02,
        -7.9175e-02,  7.2873e-02, -4.6500e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6932e+00, -1.7886e+02, -1.2307e+00, -1.0417e+00,  6.7074e-01,
        -1.7236e+00, -2.0641e-01, -3.2748e+00, -3.7105e-01, -1.8612e-01,
         8.9138e-01,  3.3248e-01, -8.0590e-02,  7.7074e-01,  1.0397e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5272e-01, -9.2384e+00,  2.8107e-01,  9.1562e-02, -5.8214e-02,
        -1.9063e-02, -5.9191e-03,  5.6641e-03, -3.5218e-02, -6.8755e-02,
        -1.2392e-02, -4.6475e-04,  2.6205e-02, -3.6510e-03, -2.6697e-02,
         3.3661e-02, -8.7945e-02,  4.5016e-02,  3.3935e-02,  4.5620e-03,
        -3.1866e-02,  4.7417e-03,  4.5068e-03,  1.3255e-02, -6.1356e-02,
        -1.1200e-02, -2.6177e-02, -4.8957e-02,  2.6610e-02, -1.5604e-02,
        -3.5008e-03,  7.5931e-04,  4.1396e-02,  6.2940e-03, -2.6605e-02,
         9.6622e-03,  9.6101e-03, -1.7110e-02, -1.3575e-02, -1.3052e-02,
        -2.4247e-02,  1.7515e-02, -3.2006e-02, -4.1299e-03, -9.5133e-03,
         3.6309e-02,  6.8147e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1157e-02, -6.5057e+01, -6.1882e-01, -2.4966e-01,  6.4362e-01,
        -1.1741e-01,  8.6853e-02, -2.4589e-01, -3.7536e-01, -1.7162e-01,
        -1.3074e-01, -3.0335e-01, -7.6263e-02, -3.2619e-03, -6.4218e-02,
        -1.6225e-01, -5.4731e-01, -1.0619e-01,  1.6437e-01, -6.1565e-02,
        -6.0396e-03,  2.2562e-02, -2.1659e-01,  9.5000e-02,  1.3574e-02,
        -1.5457e-02, -4.6834e-01, -2.0031e-02, -3.2153e-02, -2.7728e-02,
        -7.4644e-03, -1.7567e-03, -1.3323e-01, -5.5228e-02,  1.1735e-02,
         9.2908e-02,  2.0816e-02,  1.0630e-02, -1.0461e-01, -9.7193e-02,
        -1.8811e-01, -1.0371e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2617e+00, -6.1029e+01,  1.2916e-01,  5.2400e-01, -4.7711e-01,
        -2.0637e-01, -6.5742e-01, -1.0728e-01,  3.8148e-03, -5.3679e-01,
         1.1088e-01, -8.6036e-02,  8.9669e-02, -5.5925e-02, -2.2839e-01,
         9.9002e-02, -3.3833e-01, -1.0531e-01,  1.0616e-01, -2.5658e-01,
         2.3217e-01,  5.4083e-02,  6.2756e-01,  1.4697e-01,  3.6432e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2116e+00,  3.2136e+01,  3.8931e-01, -1.1127e-01, -1.8468e-02,
        -1.3516e-02,  2.4314e-01, -1.6907e-02,  1.0581e-01, -2.3764e-01,
         7.7751e-02, -1.5541e-02, -8.5240e-02, -4.2059e-01,  7.9370e-03,
         2.1334e-01, -7.6348e-02,  4.8987e-02, -1.1517e-01, -3.8077e-02,
        -1.2101e-01,  3.0908e-01, -9.7506e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0754e+00, -7.1104e+01, -3.8959e-01, -1.7851e-01,  1.8634e-01,
        -1.9255e-01, -3.6271e-02, -2.2973e-01, -4.6238e-03, -3.8368e-01,
        -5.2004e-02,  3.8939e-01, -4.7295e-02,  2.9036e-02,  1.1399e-01,
         1.1664e-01, -7.1337e-02,  2.0740e-01,  3.6683e-01,  3.3962e-01,
         1.9634e-01,  2.2029e-01,  1.2446e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9016e-01,  4.0242e+00,  3.1476e-02, -2.1382e-02, -4.7175e-02,
         4.6764e-03,  2.0144e-02,  4.2882e-02,  5.5042e-02,  1.0353e-02,
        -2.8798e-02,  4.9044e-02, -3.2188e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3178e+00,  2.3204e+02,  7.3536e-01,  1.1767e+00,  8.8474e-01,
        -4.4359e-01, -8.5178e-01,  9.0161e-01, -4.5537e-01,  1.0391e+00,
        -4.1619e-01, -1.0947e-01, -1.8635e+00, -2.9604e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7293e+00, -9.2166e+01, -2.2615e+00,  4.4460e-01,  2.2220e-01,
        -1.5381e-01, -3.3893e-01,  2.6094e-01, -8.8144e-02,  4.5310e-02,
         3.9882e-02, -1.0227e-01, -4.1185e-02, -5.2830e-01,  7.7960e-02,
        -1.7837e-01,  2.5189e-01, -2.6643e-01, -5.3591e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 1.1748e+00,  1.2264e+02,  1.0260e-01,  7.2024e-02,  2.6214e-01,
         4.8811e-01,  9.6946e-02, -5.2597e-01, -4.7635e-01,  4.5160e-01,
         2.4767e-01, -2.8232e-01,  1.8094e-01,  3.3928e-01,  5.0935e-01,
        -5.8247e-01,  5.0348e-01,  7.4547e-01,  3.1629e-01,  3.1862e-01,
        -6.9658e-02,  1.3300e-02, -9.9172e-02,  4.3577e-01,  3.4680e-01,
         4.3810e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0355e-02,  9.8805e-01,  1.1486e-02, -4.2644e-03,  1.2668e-03,
        -1.0834e-02, -4.0525e-03, -1.2858e-02, -6.9654e-03,  2.3439e-02,
         8.9142e-05, -2.9107e-03, -3.1891e-03,  7.5953e-03,  1.8723e-03,
         3.0471e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  2.1501, 101.9042,   2.8706,   2.4127,  -0.3407,   0.6230,   0.9492,
         -0.4141,   0.2069,   0.8080,   0.8662,   0.1315,   0.2477,  -0.2515,
          1.8129,  -0.2382,   0.9195,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7609e+00,  1.9045e+02,  2.7723e+00, -2.3306e-01, -1.0090e+00,
         1.6131e+00, -5.5013e-02,  1.8646e-01, -2.1243e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0432e-01, -2.2918e+01, -9.4716e-02, -3.7089e-01,  2.6018e-01,
        -1.1700e-01,  1.5717e-02,  6.0744e-02,  4.2075e-02,  2.9180e-02,
        -1.2455e-03, -1.8667e-02, -6.6786e-02, -3.6615e-02, -9.4082e-02,
         3.5345e-02,  6.1053e-03, -1.0227e-01, -8.0098e-02, -7.5180e-02,
        -6.8867e-02, -1.2230e-02, -6.7806e-02, -1.3155e-02,  1.1410e-01,
         1.9696e-02,  2.3777e-01, -1.3765e-02, -2.0743e-02, -7.9280e-03,
        -1.5833e-02,  1.8065e-02, -2.8126e-02,  5.5084e-02, -1.6123e-02,
         6.4268e-02, -9.9837e-03,  1.5224e-03,  1.4247e-01,  6.2637e-02,
         4.0683e-02,  1.1928e-01,  1.1717e-01,  1.1996e-01, -1.4473e-01,
         4.7472e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4727e-02,  7.2521e+00,  1.2200e-01, -7.9224e-02,  9.9185e-03,
         6.3459e-03,  3.8707e-03, -4.2417e-02, -2.1993e-02, -1.4123e-02,
        -1.2049e-02, -5.2128e-04, -6.8279e-02, -2.8288e-02,  2.5566e-02,
        -1.3120e-03,  1.2786e-02, -2.1926e-02,  2.2966e-02,  2.1396e-02,
        -1.3134e-02,  1.7228e-02,  2.4111e-02,  7.1589e-03,  1.4969e-02,
        -4.8308e-02,  9.1114e-03, -7.8064e-04, -9.7452e-03, -4.5398e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  1.1982, 237.7685,  -0.9231,   3.4913,  -0.3298,   2.0933,  -3.5512,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([   0.9762, -227.3586,    0.2959,   -2.9416,   -2.0576,   -0.9123,
          -0.9346,   -0.9799,    1.4545,   -0.6248,    0.4581,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6186e+00, -3.1319e+01, -3.6874e-01, -7.4684e-01, -4.6516e-01,
        -2.2685e-01, -6.9556e-02,  7.2157e-03, -1.8293e-01, -4.4428e-02,
        -1.0791e-01, -3.6470e-01, -1.2777e+00,  2.3617e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1878e-01, -3.3072e+01,  1.4185e-01, -3.1124e-03,  4.2465e-02,
         8.2013e-02,  1.1602e-01,  1.8496e-02, -5.7321e-02,  2.4509e-02,
         5.9052e-02, -1.7518e-01, -3.2872e-02,  2.4451e-01, -4.2803e-02,
        -4.6366e-02, -6.0091e-02,  2.3858e-02,  3.6794e-02, -2.9311e-01,
        -4.0537e-01, -8.9358e-02, -6.0656e-02,  3.7690e-02,  8.8747e-02,
        -2.7429e-02, -1.0081e-01, -8.3853e-03,  3.6125e-02,  4.9725e-02,
        -1.3026e-01,  8.1558e-02,  8.6338e-02, -2.7743e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9384e-03,  2.8953e+01,  6.0724e-01,  5.4512e-01, -1.7107e-02,
         8.6060e-02,  4.7674e-02, -3.7194e-01,  1.3621e-01, -2.5043e-01,
        -1.0422e-01,  3.4267e-02,  1.7470e-02,  5.7275e-02,  3.9330e-01,
        -2.4073e-01, -3.4134e-01, -2.0528e-01, -9.5570e-02, -1.2873e-01,
         1.3664e-01, -8.2923e-03, -1.5563e-01, -7.8516e-03,  2.8682e-03,
         6.4351e-02, -1.9327e-01,  4.1786e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.8468e+00, -2.2081e+02, -5.9084e+00,  3.0903e+00, -1.8734e+00,
        -2.2927e-01,  5.9083e-01,  9.1282e-02, -2.1516e-01, -2.6539e-01,
        -4.1464e-02,  1.0786e+00, -7.7894e-01, -1.1482e+00, -4.7424e-01,
         3.7494e-01, -3.2523e+00,  2.5984e+00, -1.0039e+00, -5.0677e-01,
         1.1435e-01, -5.3991e-01,  2.6111e-01, -6.7925e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-4.6250e-01,  2.3434e+02, -2.0842e+00,  4.2455e-02, -6.6153e-01,
        -1.0787e+00,  2.0100e+00, -7.9186e-01,  8.1373e-01,  2.5316e-01,
        -1.4424e+00,  4.7434e-01,  4.0628e-01,  7.4768e-01, -8.9053e-02,
        -1.4208e-01,  8.9060e-01, -4.3289e-01,  4.7243e-01, -5.3760e-01,
        -4.9905e-02, -4.5034e-01, -6.5547e-01,  5.3938e-01,  4.5979e-01,
         2.2376e+00, -8.8894e-01, -5.2894e-01, -1.9338e-01, -6.5966e-01,
         6.2583e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([   0.9604, -119.0885,    3.9089,    1.7034,   -0.3622,    1.2007,
           0.5253,    0.3109,   -2.4768,    0.1572,    1.2262,   -0.2106,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6442e+00, -6.7562e+01,  3.3168e-01, -1.2142e+00, -4.8428e-02,
        -5.3250e-01,  9.5733e-02,  2.7066e-01,  3.8502e-01,  5.0571e-02,
         6.8881e-01,  9.7280e-01, -6.3487e-01,  3.1039e-01,  3.4761e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5782e-02, -8.7304e+00,  6.8478e-02, -9.1494e-02,  7.6838e-02,
         3.8075e-02, -8.2615e-02,  1.7958e-02, -1.5972e-02, -2.1537e-02,
        -2.0110e-02,  1.0419e-02, -8.3587e-03,  9.8958e-03, -1.1065e-02,
        -6.7153e-03,  2.9219e-02,  1.1980e-02,  1.4515e-02,  3.9782e-02,
         2.0095e-04, -1.7786e-02,  1.6493e-02, -2.1611e-04, -2.7635e-02,
        -2.0710e-02, -2.9363e-03,  1.0141e-02,  2.6980e-03, -4.9961e-03,
        -1.7854e-02, -1.7674e-02,  8.5249e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3111e+00, -1.0769e+02, -1.3421e+00,  7.4333e-01, -2.0918e-01,
        -7.0065e-02,  2.5713e-01,  1.1237e-01,  1.0453e+00,  5.7128e-02,
         8.3277e-01, -2.3344e-01,  1.8764e-01,  3.9344e-01,  2.7978e-01,
         6.4514e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2262e-01,  1.2045e+01, -1.5261e-02, -5.7699e-02,  6.2058e-02,
         1.1102e-02,  8.8229e-03, -1.8836e-02, -2.0417e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.5971, 88.4868, -0.7249,  0.9994,  0.4983,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0868e-01,  4.9963e+01, -6.0061e-01,  1.5069e-02, -2.3225e-01,
        -1.7333e-01, -4.2581e-02, -2.2305e-01, -6.1127e-02,  1.9748e-02,
        -1.4357e-01, -9.1323e-03, -4.7884e-01, -1.1549e-01, -8.8505e-02,
        -2.1207e-02, -1.3864e-02,  1.5302e-01, -2.1364e-02, -1.4185e-02,
        -1.4133e-01,  3.4728e-02,  4.8140e-02, -2.0595e-01,  6.4234e-02,
         1.4851e-01,  1.7400e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  -4.2639, -151.7493,   -3.2415,    0.3830,   -1.3111,   -2.5297,
          -0.3495,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.6759e+00,  2.5968e+02,  1.9432e+00,  2.2659e+00,  1.3558e+00,
        -3.8777e-02, -2.0114e+00,  4.5298e-01, -3.9193e-01,  1.0519e+00,
        -2.4996e-01,  5.5457e-01,  6.6276e-01, -8.7750e-01, -3.2638e-01,
         4.2716e-01,  1.1872e+00,  1.0544e+00, -6.3601e-01,  2.6975e+00,
         7.4354e-01, -1.2264e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4661e-01,  1.5887e+02,  2.8985e+00,  5.6599e-01,  1.5355e+00,
         1.6923e-01, -3.4363e-02,  1.4189e+00, -2.3423e-01, -8.0419e-01,
        -1.1943e+00, -2.2698e-01, -8.5085e-01,  1.2444e+00, -9.1816e-01,
        -6.4202e-01, -5.6046e-01,  1.4047e+00, -1.8995e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5367e+00, -1.7510e+02, -4.2755e-01,  5.7059e-01, -6.8687e-01,
         3.6921e-01, -1.7423e-02,  4.8773e-01, -5.8506e-01, -1.3126e-01,
        -6.8006e-01, -5.1155e-02,  1.7453e-01, -1.8356e-01, -4.0069e-01,
        -5.0531e-02,  1.7255e-01, -1.2971e-01, -1.1644e-01, -2.8248e-01,
        -2.0387e-01,  1.0729e-01, -3.3983e-02,  1.2867e-01,  1.9874e-01,
        -1.8731e-01, -8.8054e-02,  2.0880e-01, -3.3041e-01,  5.5753e-01,
         8.6204e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 1.8232e-01,  1.8798e+01, -7.5083e-01, -7.9366e-02, -1.3575e-02,
        -6.6600e-02,  6.4870e-02,  4.9609e-03, -1.0403e-01, -2.8096e-02,
         3.2478e-04,  5.8742e-02, -6.2955e-02,  3.0168e-01, -1.9579e-04,
         7.2402e-02,  1.8337e-01,  1.0085e-03,  1.4146e-01, -4.0290e-02,
         1.2491e-01,  4.2177e-02,  1.4917e-01,  1.8709e-01, -1.7049e-01,
         4.1443e-02, -1.0811e-04, -4.7757e-01,  1.2906e-01,  9.1208e-02,
         8.8188e-02,  5.8485e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.7572e-01, -1.7918e+02, -9.2595e-01,  6.0223e-01,  2.8030e-02,
         4.3376e-02,  1.2669e-01,  4.6821e-01, -1.4701e+00, -2.3595e-01,
        -3.4224e-01, -3.0411e-01, -7.9755e-01,  9.5410e-02, -4.4834e-01,
        -3.5856e-01, -4.9740e-01,  1.9824e-02,  4.3778e-01, -1.0183e-01,
         2.2956e-01, -1.0353e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4249e+00, -1.7747e+02, -4.5039e+00,  1.3424e+00,  4.7673e-01,
        -1.3963e+00, -7.0925e-01, -6.7050e-02,  9.3593e-01,  6.0539e-01,
        -6.0022e-02,  3.4329e-01, -3.9407e-01, -2.7793e-01,  1.5461e+00,
         2.1657e-01, -8.1972e-02, -2.6950e+00,  1.2868e-01,  5.6859e-01,
         1.7603e-01,  8.4851e-01,  3.2042e-01,  5.8752e-01,  5.5593e-01,
        -9.4193e-01,  6.2548e-01,  1.7594e+00, -6.5899e-02, -4.2714e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3120e-01,  4.1127e+01, -7.1756e-01, -3.6178e-01,  9.8923e-03,
        -3.0482e-01,  2.1149e-01,  1.0939e-01, -1.9580e-01,  6.4658e-02,
        -1.2038e-01,  1.3035e-01, -3.8264e-02, -2.1237e-02, -8.2234e-03,
         1.5418e-01, -2.5086e-01, -8.6959e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1483e-01, -1.0152e+02, -3.4374e+00, -2.9929e-01,  3.2357e-01,
         1.1963e+00,  1.0199e-01, -1.6597e-01,  4.5524e-01, -7.6566e-02,
        -5.5595e-03,  3.4919e-02, -7.9622e-02, -4.5446e-01, -3.4079e-02,
         3.6039e-01, -6.0925e-01, -1.1502e-01,  5.9203e-01,  1.4789e-01,
         1.9242e-02,  9.2125e-02, -2.6090e-01, -7.0481e-01, -2.5437e-02,
        -1.4144e-02,  3.7371e-01, -7.0941e-02, -2.1231e-01, -1.1259e-01,
        -7.4948e-02, -1.2295e-01, -5.5423e-01,  2.8436e-02,  3.9342e-02,
         3.4961e-01, -7.5578e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7056e-03,  1.2821e+00, -2.8630e-02,  1.7372e-02, -1.0021e-02,
         3.9755e-03,  3.1211e-03, -4.7274e-03,  2.2992e-03,  3.9057e-03,
        -7.0677e-05,  3.6773e-03, -5.6122e-03,  7.2486e-03,  6.4766e-03,
         1.5979e-03,  8.5410e-03,  1.1236e-03,  7.9694e-04,  7.3354e-04,
         9.0462e-03,  1.4974e-03,  1.7311e-02,  1.1075e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0281e-02,  1.1953e+01, -1.1054e-01, -3.2051e-02, -3.3661e-03,
         5.4884e-02, -2.5135e-02, -3.1169e-02,  4.8614e-02, -7.6475e-03,
        -7.4015e-03, -5.0979e-03,  2.9412e-02, -1.7021e-02,  2.4187e-02,
         9.1482e-03,  4.8583e-03,  5.3276e-03, -2.0163e-03,  7.5635e-03,
         7.9775e-03, -7.4862e-02, -5.5474e-03, -4.8421e-03, -1.3569e-02,
         6.7130e-02, -1.3584e-02, -1.9915e-03, -1.6521e-02, -1.7847e-02,
         2.6926e-02, -3.3517e-02,  4.8203e-03,  1.6946e-03,  4.8944e-02,
        -3.6221e-02,  8.4885e-03,  1.1954e-02,  6.8422e-03, -6.9982e-03,
        -4.3028e-03, -2.1046e-03,  9.8657e-03,  5.9516e-03,  1.4882e-02,
         9.3767e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3583e-02,  1.0964e+01, -4.8939e-01, -1.0074e-01,  6.3204e-02,
         1.8904e-02, -1.2270e-01, -2.3537e-02,  6.8801e-03, -8.3070e-02,
         4.0388e-02,  5.3779e-02, -2.3519e-02, -8.7094e-03, -7.8630e-03,
         2.5433e-02, -4.1450e-02, -1.8426e-01,  1.8489e-02,  2.5173e-02,
         6.1140e-02, -1.2672e-02,  1.1454e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8802e-01,  2.3115e+01,  1.8072e-01, -1.1784e-01, -6.6511e-02,
        -6.6393e-02, -4.3957e-03,  4.1137e-02,  7.4886e-02, -2.8345e-02,
         2.0793e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2901,  6.4548,  0.0623, -0.0219, -0.0078, -0.0419,  0.0245, -0.0568,
        -0.0074,  0.0171,  0.0283, -0.0169,  0.0244, -0.0454,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7253, 18.1774,  0.0586, -0.0349, -0.0992, -0.0224, -0.0195, -0.1391,
        -0.1206, -0.6493,  0.2348, -0.1076,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8356e+00,  1.1381e+02, -1.2936e+00,  1.2092e+00, -1.9142e-01,
         4.7057e-01, -7.9921e-01,  1.1282e-01,  4.8556e-01,  5.1705e-01,
        -4.1950e-01,  1.9765e-01, -1.3943e-02,  1.0033e-01, -2.2830e-01,
        -1.1485e-01, -1.1608e-01, -3.3347e-01,  2.8847e-01, -2.1251e-01,
        -1.3842e-01,  5.7467e-01,  3.2181e-01, -3.2091e-01, -6.4623e-02,
        -6.6829e-01,  2.6348e+00, -1.6083e-01, -1.0309e-01,  8.0199e-01,
        -6.4282e-02,  8.1601e-01,  8.7145e-02,  5.6914e-01,  2.7252e-01,
        -1.6886e-01,  7.9385e-02,  8.1507e-02, -4.0528e-01, -2.7044e-02,
         7.9384e-02,  1.4867e-01, -7.7300e-02,  1.2192e-01,  1.3048e-01,
         3.3249e-01,  1.1656e-01, -1.8890e-02, -1.8034e-01, -8.0687e-02,
        -2.7855e-02,  6.5386e-02, -3.3090e-02,  3.6374e-02,  1.1354e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 1.7382e+00,  1.8228e+02,  1.4612e+00,  2.8013e+00,  3.1755e-01,
         2.2530e-01, -1.0759e+00, -3.3905e-01,  3.8723e-01,  1.2841e+00,
        -1.5314e+00, -3.0430e-01,  1.3845e-01,  6.0944e-01,  1.8713e-01,
         4.8151e-01,  2.2854e-01, -1.5468e+00,  6.0151e-01,  6.0390e-01,
         4.9831e-01, -4.7858e-01, -8.8535e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1748e-01, -1.5981e+01,  1.2405e-01, -2.5243e-02, -1.8473e-01,
        -3.2502e-02,  2.2057e-02, -4.8823e-02, -7.2118e-02, -1.1409e-02,
         6.0539e-02,  9.7698e-02,  1.4591e-02, -1.7097e-01, -3.9408e-02,
         2.1380e-02,  1.0317e-01,  5.0291e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6629e+00, -1.0362e+02, -3.5535e-01, -5.6114e-01, -1.9451e+00,
         8.5179e-02, -2.6763e+00, -6.4287e-01,  7.1388e-01,  7.6784e-01,
        -1.3967e+00, -3.7059e-01, -3.5874e-02,  7.6084e-03, -9.8766e-01,
         2.7476e-01,  2.3763e-01, -1.3502e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4455e+00, -1.3401e+02,  3.3412e+00,  1.3230e+00, -3.5980e-01,
        -1.5901e+00, -8.0771e-01, -2.4637e-02, -9.8671e-03,  4.8081e-01,
        -4.1106e-02, -1.8219e+00, -7.1065e-01, -1.6250e+00,  6.4586e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4634e+00, -7.0857e+01,  1.9777e+00, -1.1322e-01,  1.8625e-01,
         3.2670e-01,  2.1136e-01, -9.8165e-03, -9.7912e-02, -3.6073e-01,
        -2.2138e-02,  2.2832e-01, -3.8463e-01, -1.8148e-01,  1.2725e-01,
        -3.9094e-01,  2.5545e-01,  6.3548e-02, -1.0118e-01, -1.4016e-01,
        -7.4489e-01,  9.3540e-02,  1.4121e-01,  3.2443e-02, -1.2802e-01,
         1.4341e-01, -2.8027e-01,  1.6662e-02, -1.6592e-01, -6.9344e-01,
         3.1820e-01, -1.9554e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8423e+00,  2.1216e+02,  1.1109e+00,  8.7856e-01,  1.1618e+00,
        -3.5665e-01, -1.4349e+00,  4.5204e-03, -1.3694e+00,  7.2814e-01,
         1.4093e+00, -6.0715e-01,  4.3849e-01,  8.4442e-01, -7.7523e-02,
        -1.0755e-01,  6.1779e-02, -1.2402e+00, -1.7721e-01,  2.0670e-01,
        -1.2604e-01,  4.4608e-01, -2.5510e-01,  2.2588e-01,  4.5054e-02,
         3.7358e-01, -3.3280e-01, -3.5671e-01, -3.5999e-01, -9.8479e-02,
        -5.1537e-01,  7.0375e-01, -9.8449e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3502e-01, -1.1241e+02,  2.5028e-01,  7.4853e-01,  1.0058e-01,
         2.3325e-01, -3.9864e-01,  1.3114e-01, -2.3321e-01, -3.9445e-01,
         3.1403e-02, -3.2006e-01,  7.8814e-02,  2.2519e-01, -1.1237e-01,
        -1.9708e-01, -1.1895e-01,  1.7760e-01, -7.6142e-02, -3.5448e-02,
         9.8118e-02, -8.6655e-02,  1.3036e-01,  1.9618e-01, -1.7069e-01,
        -1.0269e-01,  5.8056e-04, -1.6842e-01,  2.6556e-02,  2.0674e-01,
        -4.7342e-02, -2.7299e-01, -2.4058e-01, -4.4209e-02, -1.3786e-01,
        -4.8633e-01,  1.3431e-01, -2.1502e-01, -6.4019e-02, -3.6983e-02,
        -6.6257e-02,  8.5259e-02, -8.9842e-02,  1.3826e-01, -3.1042e-02,
        -7.9144e-02, -9.0191e-03, -9.2505e-02, -2.4354e-01,  6.8976e-02,
        -4.3165e-01, -1.2015e-01,  1.7088e-01, -4.9273e-02, -2.7058e-01,
        -4.6762e-02,  5.7335e-02,  3.1152e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.2782e-01,  5.3029e+01, -1.7027e-01, -5.4961e-01, -1.9734e-01,
         9.7098e-02,  1.6300e-01,  1.4120e-02,  3.0856e-01,  1.1116e-02,
        -8.8426e-02,  2.8402e-01, -9.2564e-02, -1.5615e-01,  1.5960e-01,
        -2.0468e-02,  1.7741e-01,  5.6520e-02, -5.0866e-02,  4.5619e-02,
         5.1351e-02, -1.1974e-02,  6.7291e-02, -7.8230e-02, -2.2986e-01,
         4.0628e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2549e-01,  1.1941e+01, -2.2270e-01, -4.3931e-02, -6.8976e-03,
         4.3672e-02, -1.9854e-02, -3.8870e-02,  3.2361e-02, -3.0494e-02,
         3.0992e-02, -1.7559e-02, -3.3611e-02, -5.0958e-02, -2.6746e-02,
        -8.0446e-03, -3.3364e-03,  3.5796e-02, -8.3458e-03, -3.8030e-02,
        -7.0325e-03, -1.8453e-02,  1.5749e-02, -4.2625e-02, -1.4407e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4606e+00, -1.6328e+02, -7.9759e-01, -4.6615e-01, -1.0746e-01,
        -1.9182e-01,  4.3451e-01,  1.5337e-01, -2.0563e-01,  2.8703e-01,
        -6.6009e-01, -2.4601e-01,  5.5486e-01, -1.0629e+00, -5.7258e-01,
        -3.7023e-01,  5.4037e-02, -8.3132e-01, -5.0816e-01,  9.6912e-02,
         2.3520e-01, -3.0064e-01, -2.9652e-01, -3.4574e-01, -3.0435e-01,
         1.9517e-01,  5.8206e-02, -5.7853e-01, -9.1848e-02,  3.0302e-01,
         2.1815e-01,  7.2727e-01,  6.9704e-01, -1.2732e-01,  9.2063e-01,
         6.3135e-01, -2.2195e-01, -2.1095e-01,  1.2421e-01, -1.7204e-01,
         3.4639e-01, -3.9436e-01, -5.5446e-01, -2.0383e-01,  1.7182e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5977e-02,  1.5857e+00, -2.7885e-02,  1.1190e-02, -6.4211e-03,
         2.7848e-03, -4.6374e-03,  3.7652e-04,  2.7931e-03,  3.0509e-04,
         1.6022e-03,  1.0458e-02, -2.2591e-04,  8.4893e-03,  6.2992e-03,
        -1.0562e-02,  1.7435e-02,  5.1052e-03,  6.1364e-04,  5.8848e-03,
         2.3422e-03,  3.8276e-03,  2.4196e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9390e-02,  2.1404e+00,  1.2055e-02,  4.9983e-05,  3.6475e-03,
         2.5590e-02, -4.3828e-03, -4.0443e-03, -4.1504e-03, -2.3951e-02,
        -4.3517e-02,  1.9256e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([-2.6865e+00, -9.8578e+01, -2.4089e-01,  8.8451e-01, -8.4010e-01,
         1.1353e-01,  8.8326e-01,  2.5118e-01, -9.2319e-02,  4.4761e-01,
         8.7972e-02,  4.3291e-02,  2.6021e-01, -5.0479e-01,  1.3133e-01,
         2.9620e-01, -2.3184e-02, -2.8642e-01, -1.2196e-01,  1.1704e+00,
         1.8198e-01,  3.5887e-01,  4.1879e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8925e-01,  9.4285e+00, -6.0082e-02,  1.8349e-02, -5.4767e-02,
        -2.0008e-02,  9.1486e-03,  1.6474e-02,  1.9906e-02,  1.7632e-02,
         1.4494e-02, -9.6806e-03, -6.3086e-03,  2.3321e-02,  1.9984e-02,
        -6.5751e-03, -2.4720e-02,  3.5311e-02,  2.4692e-03,  1.8347e-02,
        -1.6501e-02, -1.3848e-02, -3.7686e-02, -2.0773e-02, -6.3200e-03,
        -1.8552e-02, -1.4616e-02, -9.4160e-03, -3.9166e-02,  1.7045e-02,
        -3.4539e-03, -8.1671e-02,  4.1947e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3544e+00, -1.9843e+02,  1.3765e-01,  2.3823e+00, -1.3206e+00,
        -2.9530e-01, -3.6723e+00,  1.2322e+00, -8.7473e-02, -1.8120e+00,
         3.4552e-01,  6.0820e-01, -3.5720e+00,  4.2144e-01, -1.4826e+00,
         1.0748e+00, -2.7167e-01,  3.6585e+00,  2.1759e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9900e-03,  5.0820e-01,  2.6079e-04,  1.4948e-03, -7.3533e-04,
        -4.4248e-03,  7.1561e-03, -1.4394e-03, -4.6201e-04,  4.9444e-03,
         2.7937e-04,  3.4151e-04,  4.1372e-04,  9.8642e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3869e-01, -4.7719e+01, -5.1753e-01, -2.4794e-01,  9.5267e-02,
        -2.8429e-01,  1.5736e-01, -1.5096e-02,  1.4484e-01,  4.8545e-02,
        -8.1535e-02,  9.0003e-02,  5.2608e-02, -5.4642e-01, -9.8883e-02,
        -1.1333e-02, -1.9128e-01, -6.8414e-02, -9.3617e-02, -8.3864e-02,
         5.1387e-02, -6.6917e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1268e-01,  3.5033e+01,  4.5216e-01, -2.2363e-01, -9.8059e-02,
        -9.2662e-02, -1.5892e-01, -5.0693e-01,  6.6118e-03,  7.6339e-02,
        -1.4545e-01, -8.4901e-02, -1.5603e-02,  3.1818e-02,  9.4158e-02,
        -1.8476e-02,  2.6008e-02, -2.9980e-02, -1.0808e-01, -3.7892e-02,
        -8.3228e-02, -6.7465e-02, -9.4049e-02, -4.1204e-02, -1.7240e-01,
         4.0673e-02, -2.1148e-02, -7.4141e-02, -1.4875e-01,  2.7156e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3490e-02,  6.7825e+00, -6.3962e-02, -1.2759e-01, -3.8486e-02,
         2.9843e-02, -1.1628e-02, -3.3833e-03, -3.0842e-02,  1.5721e-02,
        -7.1098e-02,  2.4333e-02,  2.2645e-03, -4.3650e-03, -4.0368e-02,
        -7.4572e-02, -2.9889e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4224e-01,  3.7751e+01,  4.6503e-01,  1.0101e+00, -1.5326e-01,
         2.9772e-01,  3.6226e-02,  4.1083e-01,  1.8976e-01, -4.4079e-01,
        -4.0210e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0241, -3.3355, -0.0292, -0.0079, -0.0195,  0.0044, -0.0084, -0.0035,
         0.0079, -0.0036,  0.0219, -0.0067, -0.0047, -0.0617,  0.0177,  0.0091,
         0.0354,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2857e-01,  2.3619e+02,  5.2630e-01,  2.7765e+00,  2.1672e-02,
         9.4451e-01,  6.3637e-01,  2.4423e-02,  1.0074e+00,  2.7009e-01,
        -9.6022e-01,  6.3464e-01,  5.6154e-01, -2.1082e-01,  2.4749e+00,
         5.6331e-01,  8.3368e-01,  2.0448e-01, -1.5557e+00,  4.2755e-01,
         1.6914e-01,  5.2768e-01,  2.3988e+00,  3.9487e-01, -7.2659e-01,
         1.9997e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9534e+00,  9.6423e+01,  1.2124e+00,  2.1318e-01, -2.9676e-01,
        -2.5765e-01, -2.2676e-01,  1.9236e-01, -3.1717e-01, -1.6153e-01,
        -6.0671e-01, -1.3031e-01, -2.4907e-01,  3.8509e-02, -2.2438e-01,
         4.8700e-02,  1.9120e-01,  2.3570e-01, -2.7065e-02, -3.1536e-01,
        -4.1872e-01,  1.8860e-02, -2.3212e-01, -3.6373e-02, -1.0093e-02,
        -1.0867e-02,  7.3750e-02, -1.9759e-01,  2.4089e-01,  6.6048e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5752e-01,  8.0676e+00, -7.6260e-03, -1.0039e-01,  1.1276e-02,
        -2.2725e-02, -1.0019e-01, -2.3694e-02, -1.4204e-02, -3.8001e-03,
         3.9444e-03, -2.8325e-02, -4.3992e-02, -4.3965e-02,  9.6470e-03,
         6.4065e-03,  4.3296e-03,  4.0277e-02,  2.9955e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 5.1601e-03,  4.9063e-01,  6.0069e-03,  2.0748e-03,  1.4222e-03,
         1.7359e-03,  1.4154e-02,  5.6640e-04,  1.8338e-03,  3.7170e-03,
        -2.1833e-04,  3.0479e-04, -4.5432e-04,  2.5932e-03,  1.6036e-03,
         7.2003e-04,  4.3449e-03,  3.8662e-03,  2.5569e-03,  4.0720e-03,
         7.6479e-04, -7.0653e-04, -1.0839e-03, -5.2139e-04, -2.6862e-03,
         2.1072e-03,  1.5922e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0017,  0.1184,  0.0004,  0.0006, -0.0005,  0.0004, -0.0003, -0.0006,
        -0.0001,  0.0004,  0.0007,  0.0007, -0.0004,  0.0007,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8783e-03,  1.6979e-01,  1.4970e-03,  7.5089e-04, -4.1447e-04,
        -8.5209e-04,  7.3475e-04, -8.4376e-05, -1.7278e-03,  9.1024e-05,
        -9.1414e-04, -2.6637e-04, -7.7750e-04,  4.6147e-04, -7.1330e-05,
        -1.3024e-03,  1.4062e-04, -8.6827e-04, -9.6428e-05, -9.1390e-05,
        -4.1836e-04,  2.5465e-05,  7.8533e-04,  3.1421e-04, -4.1617e-04,
        -5.0471e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0342e-03,  1.4251e-01, -2.3132e-03, -1.2612e-03, -1.3969e-03,
        -1.3078e-03,  4.0875e-05, -4.1017e-04, -1.1690e-03, -1.1934e-03,
        -7.8202e-04, -1.3015e-03, -2.0196e-03, -9.8155e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.7247e-04,  1.8122e-01, -3.3662e-04,  2.3632e-03, -6.8629e-04,
         6.3197e-04,  1.2643e-04,  2.0401e-04,  1.7487e-04,  1.3048e-04,
         4.8257e-04, -5.1673e-04,  7.3468e-04,  2.2290e-04,  7.3625e-04,
         3.5364e-04,  3.9283e-04, -6.3103e-07,  2.9363e-04, -3.8877e-04,
        -5.2218e-04,  1.3164e-03,  6.8612e-04, -1.8583e-04, -1.3180e-04,
         3.1124e-04, -7.4320e-04, -3.7192e-04, -9.1341e-04,  4.3235e-04,
         3.4380e-04, -1.9897e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6709e-03,  1.0853e-01,  2.4282e-03,  1.3175e-04, -1.1270e-03,
        -2.0932e-05,  4.6412e-04, -2.1459e-03,  3.9167e-04, -9.3181e-05,
        -2.4242e-04,  2.5601e-04,  1.1235e-04, -5.6703e-05,  9.4792e-05,
        -7.4214e-05,  3.9517e-04, -7.4725e-04, -4.5223e-04,  3.3896e-05,
         3.6671e-05,  4.2863e-04,  3.2415e-04, -1.0010e-03, -7.8844e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0082,  0.3251, -0.0054, -0.0042, -0.0051, -0.0041,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3069e-02,  5.7238e-01,  4.1003e-03,  3.8058e-03,  6.5598e-03,
         2.0433e-03,  1.9486e-03, -6.9633e-03, -1.3306e-03,  1.2467e-04,
         4.9651e-04,  2.3432e-03, -7.2648e-04, -1.4394e-03,  1.5706e-03,
         1.2895e-03,  2.0968e-04,  2.4299e-04,  2.8316e-04,  1.4467e-03,
        -1.8753e-03,  2.2203e-04,  3.4928e-03, -1.5207e-03, -4.6485e-04,
        -2.0033e-03, -6.8906e-04,  3.0176e-03,  3.3051e-04,  2.6701e-03,
         2.6613e-03,  4.6856e-04,  1.0725e-04,  3.6762e-04,  8.8274e-04,
         1.0071e-02, -8.0558e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2742e-01,  5.2108e+00, -8.4205e-02,  1.1519e-01, -5.3553e-03,
         7.5308e-02, -8.4191e-03,  2.6271e-02, -1.0391e-02, -1.1245e-02,
        -6.0484e-03,  1.8915e-02,  2.3757e-03,  6.7278e-03,  1.5654e-02,
        -1.6178e-02, -1.1637e-02,  2.5896e-02,  1.0872e-02,  1.7547e-02,
        -9.7922e-02, -9.9538e-03,  2.7741e-04, -1.0394e-02, -1.1281e-02,
        -1.4504e-02, -5.7906e-03,  3.3672e-02, -4.3852e-03,  1.6904e-03,
        -4.1944e-03,  4.5860e-03,  4.2060e-03,  3.0561e-03,  1.5030e-02,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0074,  0.5669, -0.0659,  0.0056, -0.0014,  0.0011, -0.0087, -0.0065,
         0.0046,  0.0047,  0.0009, -0.0009, -0.0031, -0.0085,  0.0042,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5403e-02,  5.2100e-01, -2.8450e-03,  1.0730e-03,  5.1250e-04,
         2.2202e-03, -3.8330e-04,  1.9766e-03,  1.4092e-03,  2.0868e-03,
        -1.9572e-04,  4.4189e-04,  8.0691e-04,  8.3679e-04,  1.4550e-05,
        -3.1223e-04,  2.2489e-04,  3.1722e-03,  3.7199e-03,  3.2973e-03,
         1.2238e-03,  2.3521e-03,  9.5034e-04,  5.6320e-04,  6.9085e-04,
         4.5402e-04, -4.8545e-04,  7.7143e-05, -6.0375e-04, -4.6591e-04,
         7.0859e-04, -2.2829e-04,  3.9520e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3099e-01,  1.4071e+01, -3.0594e-01, -1.1312e-02, -2.0182e-01,
        -1.0143e-02, -5.7998e-03,  2.7689e-02, -1.6825e-01,  8.7126e-02,
         4.2529e-02, -5.7033e-02,  6.9938e-02,  5.2139e-02,  3.8572e-02,
        -2.3353e-02,  5.0699e-02,  1.7619e-02,  4.0216e-02, -3.0501e-02,
        -2.9749e-02, -8.2902e-03, -7.1352e-02,  1.3313e-02,  5.0109e-02,
        -7.8166e-03, -3.5423e-02, -5.8427e-02, -2.9420e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-2.2157e-03,  1.0526e-01,  2.5338e-03, -1.1010e-03, -1.1285e-04,
        -8.3256e-04, -8.2639e-05,  7.7022e-04, -3.5031e-04, -3.2280e-04,
         7.4185e-05, -4.5029e-05,  1.8930e-04, -4.4883e-04,  2.9921e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.4910e-04,  2.3874e-01, -3.6229e-03, -2.6435e-04, -6.0589e-04,
         3.2716e-04,  8.2954e-04,  1.7055e-04,  1.1320e-03, -2.9323e-04,
        -4.5625e-04,  4.3176e-05, -1.5910e-04, -1.2033e-04, -7.3625e-04,
         8.4274e-04,  3.1591e-04,  2.1930e-03,  6.8337e-04,  2.6921e-04,
        -1.5528e-04, -3.2039e-04, -8.3139e-04,  3.3363e-04,  4.5176e-05,
         7.1526e-04, -2.1499e-04,  7.7455e-04, -5.7046e-04,  2.6458e-06,
         4.5379e-04,  3.8907e-04,  7.6774e-06, -5.8113e-04,  1.9662e-05,
         7.0306e-04, -3.2463e-04, -2.7860e-04,  7.4011e-04, -3.7155e-04,
        -5.8892e-05, -2.1614e-04, -6.4639e-04, -6.0415e-04, -1.7343e-04,
        -2.8671e-04, -7.2144e-04,  1.1973e-04, -3.2028e-04], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5162e-01,  8.4617e+00,  3.5225e-01, -7.0985e-02, -1.3754e-01,
         9.9679e-02, -3.6118e-02,  1.4793e-02, -6.7284e-02,  2.7977e-02,
        -9.1070e-03,  1.2870e-03, -2.7106e-02,  4.0564e-03, -3.4528e-03,
         3.4071e-04,  1.0827e-01,  9.3527e-03,  3.4816e-03,  3.1094e-02,
         1.0890e-02,  3.4026e-03, -1.2276e-02,  4.4733e-03, -1.3869e-02,
        -5.8547e-03, -2.3444e-02, -2.0382e-02,  1.1699e-02, -1.6283e-02,
         2.0266e-02, -2.5959e-02,  4.6520e-03,  5.6147e-02,  2.3153e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8707e-03,  7.3149e-01, -5.6152e-03,  1.4416e-03,  1.3802e-03,
        -2.5151e-03,  9.9986e-04,  2.1417e-03, -3.3257e-03, -5.8301e-04,
        -3.5780e-03,  1.1113e-03, -8.7619e-04,  3.2539e-04,  3.8545e-03,
        -2.6856e-03,  1.8806e-03,  6.3141e-04, -3.7836e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2459e-02,  8.0021e-01, -8.2727e-03, -4.1431e-03,  3.7095e-04,
         6.4061e-03,  8.3072e-04, -1.3634e-03,  2.5657e-03, -6.0149e-03,
        -4.7711e-03,  3.0207e-03,  2.1347e-03, -1.5678e-03,  1.1621e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.9954e-03,  6.9380e-01, -1.3322e-02,  2.4182e-04, -5.3536e-03,
        -3.5022e-03, -2.8753e-04,  3.3852e-03, -9.0616e-04, -1.1663e-03,
        -2.3953e-03,  5.9566e-04,  1.6375e-03, -3.1977e-03,  2.3455e-03,
         1.3420e-03, -2.7146e-03, -2.1122e-03,  2.1859e-03,  5.4906e-03,
        -1.1655e-03, -6.0734e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1146e-02,  9.9994e-01,  7.6381e-03, -1.1186e-04, -3.4439e-03,
        -1.5662e-03, -1.5505e-03,  7.9617e-04, -1.1857e-02,  5.2713e-04,
        -4.6023e-03,  3.0184e-03, -1.0403e-03, -2.2356e-03,  5.5316e-03,
         1.4436e-03,  2.9293e-03,  7.2814e-04, -4.6199e-04, -1.0563e-03,
         4.5839e-03,  1.7952e-03,  1.3601e-03,  5.4152e-04,  6.7915e-03,
        -2.0154e-03,  6.7754e-04, -3.3365e-03,  1.1792e-03, -5.4548e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0876,  6.9444, -0.2841, -0.0524, -0.0250, -0.0099, -0.0255, -0.0594,
         0.0179, -0.1110, -0.3822,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1383e-03,  7.3287e-02, -7.1058e-05, -2.3657e-04,  9.8116e-04,
        -4.2457e-04,  2.7804e-05, -2.9735e-04,  4.4456e-04,  1.6007e-05,
         4.8999e-04,  1.4351e-04,  1.8276e-04,  2.5914e-04,  2.4821e-04,
         1.9670e-04, -2.1087e-04,  1.4661e-04,  2.8658e-05, -6.9317e-05,
         2.4700e-04, -5.8298e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2268e-02,  1.0763e+00,  2.1378e-02, -6.0756e-03,  2.7212e-03,
         1.0350e-02,  5.8608e-04,  5.4713e-03, -1.3081e-03,  1.2654e-03,
        -4.5455e-03,  5.6633e-03,  1.4764e-03, -4.8264e-03,  2.2048e-03,
        -1.1328e-03, -1.1946e-03,  3.5986e-03, -1.7202e-04, -8.3468e-04,
        -3.5028e-03, -2.8910e-03, -1.0544e-02, -1.7394e-03, -1.2402e-04,
        -1.5041e-03,  1.9207e-03,  6.5876e-03, -2.6415e-03, -7.7086e-04,
         7.9244e-03, -7.0538e-03, -4.0676e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.9572e-03,  8.4747e-01,  1.6600e-02,  1.4516e-02,  3.7865e-03,
         5.6153e-04,  4.1839e-03, -6.1683e-03,  1.3838e-03,  3.9973e-04,
        -1.1328e-03, -2.8149e-03,  1.3145e-03,  3.6258e-03, -3.4850e-03,
        -7.4735e-04,  4.1272e-03, -3.0475e-03,  1.6139e-04, -5.2930e-04,
        -2.4838e-04, -2.0146e-03,  3.5205e-03,  2.4206e-03, -1.9056e-03,
        -5.4059e-04,  2.9703e-03,  3.4583e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3658e-02,  2.8270e+00,  7.9929e-03,  2.7595e-03, -1.4364e-03,
         1.6815e-03, -4.1626e-03,  1.6106e-02, -6.4526e-02, -3.9351e-03,
        -9.4643e-03,  6.7620e-03, -1.5710e-02, -2.1319e-03, -3.5313e-03,
        -8.9497e-04, -2.7311e-03, -1.9499e-02, -1.6470e-04, -3.4938e-03,
         1.4718e-03, -1.0878e-02, -1.4103e-02,  6.6374e-03, -1.9077e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 2.2620e-04,  4.8243e-01,  6.3098e-03,  6.0906e-03,  4.7698e-04,
        -1.9577e-03,  2.4775e-03,  3.5939e-04,  8.4455e-04, -1.5482e-03,
         3.8378e-03,  1.4245e-03,  2.4257e-04,  3.0376e-04, -3.0545e-04,
        -7.1255e-03, -6.2330e-04,  7.0497e-04,  3.1441e-04, -3.1823e-03,
        -1.3499e-04, -6.0786e-04,  4.0104e-03,  6.8041e-04,  5.7253e-04,
         4.3622e-05, -7.3043e-04,  9.1582e-05,  1.5561e-04,  1.4195e-03,
        -2.3485e-03, -5.7523e-04, -2.0872e-03, -7.3755e-04, -1.1500e-03,
         6.3358e-04, -2.5976e-04,  7.4946e-04,  4.8344e-04, -1.5986e-03,
         5.3533e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.5177e-03,  7.8254e-01,  9.0237e-03, -1.0087e-02,  2.2931e-04,
        -5.7538e-03, -8.0645e-04, -9.3256e-04, -2.3685e-03, -4.4872e-04,
         1.6627e-03, -1.9584e-03,  1.3209e-03, -7.3746e-04,  6.7450e-03,
        -1.8809e-03, -6.7008e-04, -2.1927e-03,  9.1386e-04,  2.7812e-04,
        -4.2349e-03, -4.1334e-03,  4.8810e-04, -5.6500e-03,  3.4706e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0206e-01,  3.1361e+00, -3.7775e-02, -9.8296e-03,  2.3043e-02,
         3.6400e-02, -2.9415e-02,  1.7974e-02,  1.2887e-02, -4.8124e-03,
         1.1835e-02,  2.6631e-02,  3.9121e-03, -8.1562e-03,  1.3461e-02,
         6.4698e-03,  2.7244e-02,  9.4048e-04, -1.5790e-02,  1.3308e-02,
         1.8689e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8886e-03,  2.4095e-01,  5.5511e-03, -2.3049e-03, -1.3625e-03,
        -7.6627e-04, -1.5282e-04, -7.3105e-04, -1.9802e-03, -2.3962e-04,
         3.0657e-03, -3.2643e-03,  2.6236e-03, -1.1686e-03, -1.1022e-03,
        -4.3613e-04, -7.9444e-04,  4.9259e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.2399e-04,  3.1192e-01,  1.9480e-03,  1.6484e-03, -1.1232e-04,
         1.0831e-04,  6.0080e-04,  8.1442e-05, -8.3441e-04,  5.0543e-04,
         1.1849e-03, -9.3034e-04,  2.8169e-04,  2.4298e-04,  4.5773e-04,
         2.3343e-03, -4.8729e-04,  9.9533e-06,  9.0308e-04, -1.2604e-03,
         9.1835e-04, -4.2387e-05,  6.9368e-04, -4.2238e-04,  5.7311e-04,
        -1.7867e-03, -7.8005e-04, -6.1668e-04, -4.0308e-04, -1.3504e-03,
         2.4011e-04,  2.5823e-04, -1.0166e-03, -3.9201e-04,  2.0892e-03,
         5.8793e-04, -7.6614e-04, -3.9948e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.2939e-03,  6.6517e-01, -1.1441e-02,  2.8875e-03,  3.2176e-03,
        -2.1726e-03, -4.7108e-03,  5.1055e-04,  1.4210e-03,  2.8341e-05,
        -1.1161e-03, -9.1713e-04, -3.2576e-04,  3.9325e-03,  3.2591e-03,
         4.5331e-04, -2.2558e-04, -4.1431e-03,  3.6359e-04, -7.6131e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.0179e-02,  6.0690e+00, -1.2639e-02,  7.1529e-02,  1.5736e-02,
        -5.2734e-02,  3.9013e-02, -4.0560e-03,  1.4974e-03,  7.2428e-02,
         6.7417e-02, -2.4511e-02,  1.2358e-02,  4.8429e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3842e-02,  1.1485e+00, -9.1414e-03, -5.4047e-04,  6.4581e-03,
         6.5749e-04,  7.9099e-04, -9.6290e-04,  3.3733e-03,  2.3896e-03,
         2.7721e-03, -5.7740e-03,  4.6456e-03,  1.2223e-04,  3.6135e-04,
        -4.4973e-03,  4.8668e-03,  2.9217e-03,  3.9805e-03, -1.8175e-03,
        -6.1815e-03,  5.2798e-03,  7.9188e-04,  3.5017e-03,  7.6776e-04,
        -5.6738e-03, -1.2788e-03,  6.9307e-05,  7.0202e-04,  9.9315e-04,
        -1.7692e-03, -4.1605e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7614e-01,  7.9225e+00, -4.1924e-02,  2.3696e-01, -3.5534e-02,
        -2.5553e-02,  3.9735e-02, -1.1605e-01,  3.0466e-02,  2.0746e-02,
        -8.7651e-03, -2.5451e-02,  6.3886e-03,  3.3838e-02,  9.8602e-02,
        -8.2080e-03,  5.3308e-03,  2.3469e-02,  4.7591e-03, -1.5265e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.9447e-03,  8.8671e-01,  1.9276e-02,  6.9228e-03,  1.2063e-03,
         1.3617e-03,  1.5720e-03, -4.0161e-03,  2.9703e-03, -6.1535e-03,
         1.3972e-03,  2.5382e-03, -7.9904e-05, -1.1028e-03, -1.5073e-03,
        -5.1182e-03, -1.0684e-03,  5.9942e-04, -4.1354e-03, -1.0038e-03,
         7.1868e-04, -2.7563e-03, -3.5064e-03,  2.8253e-03,  1.2488e-03,
        -3.4153e-03, -6.1885e-03, -5.7617e-04,  7.6485e-04,  3.4461e-03,
        -1.8653e-03,  1.6287e-03, -1.7038e-03,  3.9644e-03, -3.6864e-04,
         1.6365e-03,  2.4358e-03, -2.9241e-03,  1.0969e-03,  2.2808e-03,
         1.3729e-03,  3.7106e-04,  4.6456e-03, -5.0831e-04, -1.0922e-03,
        -2.9656e-03,  2.9054e-03,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.5916e-03,  1.0423e+00, -1.6999e-02,  5.6675e-03,  9.7648e-03,
         1.3364e-03, -5.7671e-04, -3.8868e-03, -2.3653e-03,  5.7633e-03,
        -8.7228e-03, -5.5074e-03, -4.1959e-04,  4.1220e-03, -2.8425e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3418e-02,  7.1742e-01,  5.5130e-03,  2.7174e-03, -1.4782e-03,
        -2.4023e-03, -4.0370e-04, -1.3540e-03,  3.4582e-03,  3.4120e-03,
         2.5591e-03,  1.2401e-03,  1.2936e-05,  5.1710e-03,  5.6859e-04,
         1.1783e-03,  2.4196e-03,  8.7352e-04,  6.4994e-03, -7.0614e-04,
         7.2817e-04, -4.5063e-03,  6.0497e-04,  4.1864e-03,  4.1236e-04,
        -2.0580e-03,  3.0561e-03,  4.3299e-03,  4.4689e-03, -1.8246e-04,
        -9.5495e-04, -1.0240e-03,  2.9935e-04,  7.3810e-04,  4.9404e-04,
        -1.0930e-03, -9.0157e-04, -7.6653e-04, -2.8160e-03,  8.2819e-04,
        -9.2570e-04, -4.5840e-04, -6.6450e-04, -1.3295e-04,  2.5519e-03,
        -5.2063e-04,  2.9987e-03,  1.0332e-02], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 7.4351e-02,  2.8587e+00,  2.6276e-03, -8.8736e-03,  2.5708e-02,
        -3.9124e-02,  1.4292e-02, -2.7914e-03, -3.8731e-03, -2.9023e-02,
         3.5134e-03, -2.0129e-02, -1.5281e-02, -7.6356e-04, -1.8822e-03,
        -1.2527e-03, -6.5773e-02, -7.5221e-03,  2.2280e-03, -8.3580e-03,
        -1.7505e-02, -5.3288e-04, -3.7295e-04, -5.4954e-03,  1.7378e-02,
        -2.7036e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4660e-02,  1.5692e+00,  1.0044e-02, -2.0261e-03,  7.2307e-03,
        -9.7866e-03,  5.5378e-03,  1.2759e-02, -4.0189e-03,  2.4289e-03,
         1.0949e-02,  1.8494e-03,  3.2386e-04,  2.8506e-03,  1.8723e-03,
        -2.3352e-03, -3.4247e-03, -5.8353e-03,  1.2219e-03,  1.0776e-02,
        -3.5117e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.0775e-03,  2.5749e-01, -1.8783e-03, -6.0789e-03,  8.4996e-04,
        -3.5819e-04, -2.6806e-04,  1.5901e-04,  1.3605e-04, -2.8297e-04,
         1.9663e-04,  5.4494e-04, -8.6021e-04, -1.2598e-03,  1.5554e-03,
        -6.3916e-04, -5.2069e-04, -7.1407e-04,  2.5374e-04, -7.9871e-04,
         7.9128e-04, -7.1525e-04,  1.7369e-05, -9.0620e-05,  2.4277e-04,
        -2.1742e-04,  7.0661e-04, -3.6441e-05, -7.0678e-05,  2.1539e-04,
        -3.0987e-04, -5.5870e-05,  3.6648e-04, -9.1806e-04, -1.2216e-03,
        -8.0878e-04,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1989e-03,  1.8565e-01, -2.5862e-03, -2.0468e-04,  2.1289e-03,
         1.9146e-03, -1.9469e-04,  6.1930e-04,  9.4202e-04,  9.9687e-04,
         8.6169e-04,  4.7885e-04, -8.7431e-04, -1.4634e-03,  1.3063e-04,
         7.8693e-05,  5.0191e-04,  3.2200e-04,  1.0308e-03, -1.2694e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8513e-02,  1.2880e+00,  7.5199e-03,  6.2571e-03,  5.8402e-03,
        -1.2974e-03, -2.4991e-03,  1.4987e-04, -1.9453e-03, -3.7398e-03,
         7.4773e-04, -6.1310e-03, -8.1527e-03, -8.7478e-03, -9.7421e-05,
        -2.2347e-03, -5.2858e-03, -5.1468e-03, -1.0211e-02, -6.2685e-03,
         3.8595e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2742e-01,  1.1619e+01, -1.7111e-01,  8.7614e-02,  3.5845e-02,
        -4.5178e-02, -6.7568e-02, -7.7261e-02,  4.7445e-02,  9.3230e-03,
         6.2079e-03,  5.9260e-02, -9.0835e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.2422e-03,  4.9001e-01,  5.2325e-03, -1.3699e-03, -6.6240e-04,
        -1.2114e-03, -4.5021e-03, -5.7592e-04, -3.4241e-03, -1.8029e-03,
        -1.9157e-03, -1.5776e-03, -1.7897e-03, -3.8126e-03,  2.7880e-03,
        -7.7991e-05,  9.3948e-05,  2.8525e-04, -7.9846e-04, -8.3854e-04,
         2.2792e-03,  2.6282e-04, -3.2053e-03, -1.3583e-03, -6.8453e-04,
        -2.1806e-03, -2.8997e-03, -1.7802e-03, -1.4656e-04,  1.6206e-04,
        -9.4891e-05, -4.7222e-04, -9.2991e-04, -1.7942e-03,  1.5568e-03,
         3.2027e-03, -1.0290e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6781e-02,  4.0593e+00, -2.4816e-02, -2.3767e-02,  2.5067e-02,
         6.9132e-03,  7.1275e-03,  7.6292e-02, -7.9077e-03, -2.0142e-03,
         4.7437e-03,  1.5568e-02,  6.9346e-03,  1.4041e-02,  2.1727e-02,
        -6.3409e-03,  3.9361e-02,  6.8602e-03, -4.9545e-03,  2.4096e-02,
         1.0307e-02, -3.5987e-03,  8.6624e-02, -1.1577e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8329e-02,  3.8308e+00,  1.0273e-01,  1.0945e-02,  6.7625e-03,
        -1.1956e-02, -1.8439e-03,  5.9598e-03,  2.0162e-02, -6.5946e-03,
        -1.8458e-02,  2.3341e-04, -1.2164e-02,  2.2800e-02,  1.3064e-02,
        -1.6281e-02,  1.9712e-02,  2.4515e-02,  1.0278e-02,  2.5013e-02,
         2.4198e-03, -8.8872e-03,  1.6420e-02, -3.5234e-03,  4.2017e-03,
         4.2579e-04,  7.0448e-03, -2.4504e-04, -9.9023e-03,  5.9760e-03,
        -1.9960e-03, -1.0431e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3982e-02,  2.1389e+00, -1.2324e-02, -1.8093e-02, -1.1607e-02,
        -6.7780e-02,  2.4333e-03, -1.5184e-02, -3.1518e-03,  5.6630e-03,
         3.1434e-03, -1.4785e-03, -3.5501e-03, -9.6910e-04, -4.0708e-03,
        -3.1478e-03, -6.1986e-03, -5.4824e-04,  1.8244e-03, -4.5288e-03,
        -2.1579e-03, -2.2642e-02, -3.7957e-03, -5.2980e-03, -1.1375e-02,
         1.2666e-02,  3.1780e-03, -9.2285e-03,  8.9609e-03,  1.4776e-02,
         8.6351e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4912e-03,  1.5483e+01, -1.3853e-01,  8.1278e-02,  5.2722e-02,
         6.4826e-02,  1.6390e-01, -1.3394e-01,  2.3802e-02,  2.6548e-02,
         3.8485e-02,  4.3046e-02, -3.9516e-02, -9.0157e-02,  1.0940e-01,
         2.9545e-02,  3.4524e-02,  8.4947e-02,  2.5875e-02, -1.2853e-02,
         1.1311e-02,  5.9601e-02, -1.1270e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2067e-02,  4.3385e+00, -1.2386e-01, -1.3535e-02,  6.4534e-04,
         1.5983e-03, -2.5138e-02, -1.1024e-02, -4.3509e-03,  7.0783e-03,
         1.0065e-02, -7.5276e-04,  3.2064e-03, -5.5590e-03, -2.2105e-03,
        -1.5846e-03, -1.8141e-02, -2.1885e-02, -1.4336e-02,  6.6391e-03,
        -1.7668e-02, -2.8102e-02,  1.5242e-02, -5.0542e-03,  6.9145e-03,
         3.1093e-03,  4.5499e-04, -8.4872e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-7.1484e-02,  6.3562e+00,  1.9701e-02,  2.5044e-02, -3.3249e-02,
        -4.4818e-02,  2.6646e-02, -6.2973e-03,  1.6678e-03, -1.6423e-02,
         5.1884e-03, -2.8673e-02,  1.8580e-02,  1.9782e-02, -1.4788e-02,
         2.0066e-02, -7.3070e-03, -3.9542e-02, -2.2469e-02,  5.4729e-02,
         3.8266e-02, -9.5680e-03, -6.1982e-03, -2.0461e-02,  7.2342e-03,
         4.2871e-02,  8.2728e-03,  1.3858e-03, -2.5785e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2244e-02,  1.5944e+00, -2.0161e-02,  2.0787e-03, -6.9248e-03,
         2.9707e-03,  5.1128e-03, -2.9456e-03,  1.9970e-03, -2.3747e-03,
        -1.0025e-02, -6.7832e-03, -4.3963e-04,  4.9341e-03,  3.8811e-03,
        -1.8142e-03,  3.3931e-03, -4.3372e-03,  9.3072e-03, -1.1614e-03,
         1.5106e-03,  1.7215e-03, -1.1340e-02,  1.2204e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4106e-03,  6.6359e+00,  1.3257e-01, -6.5962e-02,  7.8481e-02,
         4.9331e-02,  6.1852e-04, -1.1088e-02, -1.1852e-02,  1.9527e-02,
         3.9377e-02, -1.7549e-02,  2.7257e-02, -1.3616e-02, -5.8965e-03,
         6.3648e-04,  1.1374e-04,  1.1793e-02,  7.1655e-03,  1.1238e-02,
        -1.5448e-02, -6.5990e-04, -2.3986e-03,  3.7426e-02, -1.7179e-02,
        -1.7967e-02, -6.6806e-02,  4.5617e-03, -7.5017e-03,  1.8097e-02,
         1.2553e-02, -3.1453e-02, -3.6698e-02,  2.0284e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.9490e-02,  2.8005e+01, -4.2448e-01,  1.9834e-01,  8.2200e-02,
        -9.5635e-02,  1.4136e-01,  5.4032e-03,  1.1682e-01,  1.3686e-02,
         4.3760e-03,  5.3010e-02,  3.8748e-02,  2.5575e-02,  4.7685e-02,
        -1.6715e-03, -7.6977e-02,  2.5298e-02, -1.0130e-02,  5.2596e-02,
         2.0666e-01,  1.5091e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5250e-02,  1.2728e+00,  8.4431e-04,  6.5457e-03,  6.6356e-03,
         1.0062e-03,  2.1217e-03, -8.0014e-04, -2.5995e-03, -6.1030e-04,
         2.1396e-02, -2.7706e-03, -6.0259e-03,  2.4874e-03, -9.1012e-04,
        -2.7809e-03,  7.7412e-03,  1.5579e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7273e-03,  1.0681e+00, -3.3401e-03, -5.4983e-04, -1.3812e-03,
        -2.0010e-03,  5.3645e-05, -3.6582e-03, -6.0276e-03, -5.2459e-03,
        -1.9109e-03,  2.1653e-05, -2.1889e-03,  7.2457e-03, -3.0130e-04,
         2.2170e-03, -9.0539e-03, -1.7163e-03, -5.7728e-03, -3.8599e-03,
         5.3901e-04,  2.7849e-03,  1.0034e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3201e-01,  9.5426e+00,  3.4437e-02, -1.0675e-01, -1.2414e-01,
        -1.9768e-02,  8.8110e-03,  5.1804e-03, -9.6311e-02,  1.1038e-02,
         3.4845e-03, -2.5925e-03, -5.0666e-03, -3.8722e-03,  1.9307e-02,
         2.1733e-02, -2.0502e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.2458e-02,  1.6226e+01,  2.6670e-02,  5.9817e-02,  2.6975e-02,
        -3.2544e-02,  1.1874e-01, -1.4212e-02,  1.0207e-01, -2.1580e-02,
        -2.6498e-02,  1.6289e-02, -1.3273e-01, -5.1631e-03,  3.5506e-02,
         2.0039e-03, -3.4908e-02,  2.5433e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8471e-02,  1.3648e+00, -3.1269e-02, -1.5002e-02, -6.4756e-03,
        -9.4591e-03, -2.0486e-03, -2.9819e-03, -8.5342e-03,  4.1541e-02,
         5.0718e-03,  1.9716e-03,  3.9694e-03, -2.7322e-03, -1.5894e-03,
        -3.7809e-03, -1.7830e-02,  4.2486e-03,  1.4544e-03,  5.2129e-04,
        -1.8996e-04, -5.3096e-03,  4.6745e-03,  5.1659e-03,  1.0532e-02,
        -2.7512e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0056e-02,  1.5533e+00, -1.4561e-02, -6.2048e-03,  3.7171e-03,
        -6.0200e-03,  5.3983e-03,  9.7274e-03,  2.1669e-02, -2.8727e-02,
         1.8362e-03, -7.6394e-03, -2.9265e-03, -1.7398e-02,  7.4087e-04,
         5.4440e-03, -1.1871e-03, -4.5324e-03,  4.0111e-03, -8.4570e-03,
        -2.6477e-03, -3.6328e-03,  1.6396e-02,  1.4122e-02, -1.5830e-03,
         2.3189e-03, -2.3658e-03,  2.0118e-03, -4.8395e-03,  1.1312e-02,
         2.0534e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1505e-02,  4.6911e-01,  2.5729e-03,  2.2778e-03,  2.7169e-03,
        -3.0182e-03, -1.7381e-03,  5.7222e-03,  3.3352e-03, -4.6105e-03,
        -4.3121e-04, -1.9651e-03, -4.2268e-03,  1.6309e-03, -8.4138e-03,
        -1.1137e-03,  7.1865e-05,  7.0127e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.1673e-02,  1.1132e+00,  1.0755e-02,  1.1537e-03,  8.8863e-03,
        -1.3853e-02, -5.6483e-04, -2.4238e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 2.2393e-01,  7.0812e+00,  5.2277e-02, -4.0051e-03, -8.6148e-03,
         2.1781e-02, -1.7885e-02, -1.0394e-02,  7.4402e-03,  3.5460e-03,
         1.3214e-03, -1.1942e-02,  2.4891e-02,  1.4105e-03,  1.9604e-02,
        -4.2334e-03, -7.4041e-03, -1.4369e-02,  1.0018e-02, -4.0323e-03,
         8.0203e-04,  1.2618e-02, -7.5023e-03, -4.2788e-03, -8.2109e-03,
        -1.2980e-02, -6.7072e-03,  1.3288e-02,  2.6016e-02,  5.0634e-03,
        -1.5495e-02,  1.5715e-02, -7.3748e-03, -8.6823e-03,  1.2799e-02,
        -2.7984e-04, -9.5814e-04,  2.7933e-02,  6.9724e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.8173e-03,  1.1738e+00, -1.4730e-02, -4.6995e-03,  1.9013e-04,
         1.8646e-03,  4.1027e-04, -7.3667e-04, -9.3474e-03,  1.0857e-03,
        -2.0941e-03,  5.9695e-04,  1.2837e-02, -4.1590e-03,  2.1943e-03,
        -4.0172e-03, -1.9375e-03, -7.2404e-03, -5.8822e-05, -2.1559e-03,
        -4.6456e-03, -3.9837e-03,  9.9883e-04, -1.5674e-03, -1.1138e-03,
         4.2778e-03, -1.2633e-02, -6.5483e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4911e-01,  6.6701e+00,  2.9167e-02, -5.5010e-02,  2.9702e-02,
         4.4230e-02, -2.0023e-02, -7.6896e-03,  1.0706e-02,  1.9242e-02,
         3.3757e-02,  5.6586e-03,  6.2065e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1205e-01,  2.9633e+01, -4.2328e-01,  1.6434e-01,  7.8291e-02,
         7.0332e-02, -3.9969e-02, -1.6879e-02,  2.2946e-01, -2.1941e-02,
         1.3117e-01, -1.8219e-01, -3.9099e-02,  1.7760e-01, -6.9945e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3045e-02,  4.0079e+01,  9.4797e-01,  1.6123e-01, -7.9656e-02,
         3.8059e-02,  2.2864e-01, -1.2040e-01,  2.4075e-02, -4.3277e-01,
         3.2151e-02,  8.2533e-02, -4.9145e-01, -4.2068e-03, -1.2184e-01,
        -1.6053e-02,  3.4356e-02, -9.1660e-02,  8.4374e-04, -1.1674e-02,
         1.4928e-01,  1.1160e-02, -2.5380e-02,  8.0160e-03, -1.5415e-01,
         7.2766e-02, -6.0389e-02,  1.6459e-01, -1.7231e-02,  1.8364e-02,
         6.3020e-02, -1.0651e-02,  4.6525e-02,  4.8575e-02,  4.1344e-02,
         2.9493e-02, -2.4668e-02,  1.2284e-02,  1.1801e-02,  5.9127e-02,
         2.4179e-01,  6.8760e-03, -6.6597e-03,  1.0426e-02, -8.8320e-02,
         1.1580e-01, -3.3527e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7586e-02,  1.3147e+01, -1.3023e-01,  2.4872e-02, -1.4529e-01,
        -1.9535e-01, -2.8476e-02,  5.5109e-03, -1.3416e-01,  1.1379e-02,
         1.4549e-02,  2.5996e-03, -3.5673e-02, -1.0101e-02, -7.2459e-03,
        -3.1035e-02,  2.1024e-02,  2.7563e-02, -1.8475e-02, -1.4963e-02,
        -1.8977e-02, -2.9893e-02, -4.6480e-03, -4.0241e-02, -4.2748e-03,
         3.8849e-03,  1.0596e-02, -2.1920e-02,  9.3897e-03,  1.2341e-03,
        -4.9231e-02,  2.0592e-03,  4.8217e-02, -6.0219e-03,  1.3904e-02,
        -1.1998e-02, -3.7424e-02, -2.8473e-02, -1.7171e-02, -9.1322e-03,
         3.6930e-02,  1.8418e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2863e-01,  5.3394e+00,  1.2623e-01, -1.9728e-02,  1.1032e-02,
        -6.5038e-02,  1.8840e-03,  1.2565e-02,  1.4974e-02,  5.3827e-02,
        -3.4751e-02,  1.7951e-02, -1.5380e-02, -4.5993e-03,  1.3281e-02,
        -5.6251e-02,  1.2912e-02,  7.7229e-03, -9.9410e-03,  1.1780e-03,
        -7.2337e-03, -1.7782e-02, -4.9345e-02, -3.0918e-02, -4.0442e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1989e-02,  2.7769e+00,  8.0486e-02, -4.3338e-03, -1.0828e-02,
        -1.0225e-02,  1.1831e-02, -2.1750e-03, -1.9122e-02,  4.5601e-03,
         9.1342e-03, -3.1444e-02, -6.4026e-03, -3.4170e-02, -9.9872e-03,
         2.0499e-03,  2.6215e-03,  3.6731e-02, -7.7177e-04,  1.8155e-02,
         1.8287e-02,  1.2497e-02, -2.8317e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5662e-01,  6.3589e+00,  1.1669e-01,  5.9395e-02,  3.6320e-02,
         1.2993e-02,  1.5522e-02, -2.5209e-03, -3.4885e-02,  2.5445e-02,
         2.0568e-02,  2.0655e-02, -1.4810e-03, -1.0551e-02, -5.5351e-03,
         9.4860e-03,  1.5879e-03,  3.4168e-02,  9.5017e-04, -2.1543e-02,
        -1.7644e-02, -4.6598e-02,  4.0986e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0231,  0.5003, -0.0063, -0.0018, -0.0007,  0.0052,  0.0026,  0.0115,
         0.0020,  0.0008, -0.0092,  0.0008,  0.0026,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5327e-02,  2.6706e+00,  6.3735e-02, -3.8169e-02, -5.0425e-03,
        -2.1051e-02, -1.5989e-03, -4.5574e-03, -9.1029e-03,  4.8919e-04,
        -2.6413e-02, -3.4936e-02,  7.2537e-03, -1.1489e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4480e-02,  3.5889e+00,  4.9667e-02, -1.8289e-02, -1.5253e-02,
        -5.2717e-03,  3.6147e-03, -4.5798e-03,  8.4939e-03, -1.3944e-02,
        -7.3727e-03, -1.7371e-03, -4.2260e-02, -1.1573e-02, -6.4675e-03,
        -2.1968e-04, -2.2664e-02,  2.1275e-02,  1.8396e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 2.2017e-01,  1.4798e+01,  2.6802e-02, -2.2862e-02, -8.2021e-02,
        -9.7198e-02, -1.7595e-02, -1.3371e-01, -1.6508e-02, -3.1108e-02,
        -2.4938e-03, -2.1221e-02, -3.3273e-03,  1.3633e-01, -4.4896e-02,
        -9.1130e-02,  4.6999e-02, -2.0966e-02,  3.4331e-02,  3.0369e-02,
        -2.0910e-02, -2.9744e-02,  9.0229e-03,  6.9561e-02, -6.4877e-02,
        -1.2186e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0073,  0.4179, -0.0045, -0.0052, -0.0008,  0.0016, -0.0030, -0.0028,
         0.0028, -0.0041, -0.0012, -0.0005, -0.0013,  0.0032,  0.0019,  0.0034,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7234e-01,  7.2803e+00,  3.4816e-01,  2.0285e-01,  1.1573e-02,
         1.4158e-01,  1.6818e-02, -3.0063e-02, -4.0614e-02,  4.1479e-03,
        -8.0442e-04, -1.5216e-02,  1.0235e-02,  2.7522e-02,  6.0936e-02,
         7.9705e-02, -9.8477e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0551,  7.7241,  0.1665,  0.0731, -0.0296, -0.0649,  0.0364,  0.1253,
         0.0287,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1131e-02,  1.4405e+00, -1.2518e-03, -1.6247e-02, -1.7622e-02,
         8.9096e-03,  9.3416e-03,  8.4352e-03,  1.0083e-02, -1.3711e-03,
         4.1052e-03,  1.5290e-02,  9.2726e-05,  7.0870e-03, -3.7466e-04,
         5.0174e-03,  8.9673e-03,  5.0224e-03,  2.2748e-03,  5.3216e-03,
         2.1021e-03,  4.4883e-04,  3.3639e-04,  9.2202e-04, -1.5319e-03,
        -7.3792e-03, -8.7249e-04, -5.6173e-03,  8.4569e-04, -2.0595e-03,
         1.0589e-03, -1.0206e-03,  2.6100e-03, -5.1782e-03, -6.0320e-04,
         2.3282e-03, -5.6204e-03, -8.7578e-04, -9.0233e-03, -4.3371e-03,
        -4.0746e-03, -1.0803e-02, -6.9342e-03, -4.8733e-03,  1.1810e-02,
         1.8766e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5120e-03,  3.8596e-01,  3.8642e-03, -1.3727e-03, -2.7587e-03,
        -5.7574e-04, -2.0663e-03, -3.2907e-05, -1.4994e-03, -1.4758e-03,
        -1.5316e-03, -2.3877e-04, -3.7859e-03, -1.7223e-03,  3.1409e-04,
        -7.8337e-04,  8.2320e-04, -8.7880e-04, -1.7475e-04,  5.1377e-04,
         2.4708e-03, -3.1095e-04, -1.7908e-03, -5.1645e-06, -7.1980e-04,
        -1.1354e-03,  6.5316e-04, -1.3100e-04, -5.9892e-04,  1.7463e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9920e-01,  1.9987e+01,  2.3412e-01,  1.7598e-01, -1.5754e-02,
         3.0871e-02,  1.4058e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4800e-02,  3.6789e+01,  1.2792e-01, -1.8387e-02, -5.3132e-02,
         1.8776e-03,  3.0757e-01,  2.7460e-01,  4.6797e-01, -2.4977e-01,
        -1.4787e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.7234e-01,  2.0013e+01,  3.9182e-01,  1.5658e-01,  9.9321e-02,
        -1.1867e-01,  8.5139e-04, -8.3280e-02,  1.4808e-01, -1.5525e-02,
        -8.3331e-02,  9.8547e-02, -2.2726e-01,  1.3220e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7276e-02,  3.2511e+00,  1.0490e-02, -1.7695e-03, -9.5274e-03,
        -9.8531e-04,  8.8208e-03,  8.0454e-03, -2.4052e-03, -1.5161e-02,
        -5.6405e-04, -2.0011e-04, -6.4476e-03, -1.2153e-02, -1.2009e-03,
         1.1002e-02,  3.7718e-03,  5.8034e-03,  8.8648e-03, -9.7152e-03,
        -7.3099e-03, -2.0535e-02, -7.2740e-03, -7.3470e-03, -6.1065e-03,
         7.1250e-03, -2.2647e-03,  6.9894e-03,  1.5204e-02,  1.5462e-02,
        -6.3971e-03, -8.5177e-03, -9.8245e-03,  9.2508e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5848e-03,  2.9801e-01,  5.3977e-03,  7.1509e-03, -5.9503e-05,
         5.9481e-04, -1.7175e-03, -1.0091e-03, -7.5243e-04,  1.6696e-03,
        -9.0053e-04,  1.0526e-04,  1.1294e-03,  7.9255e-04, -9.4561e-04,
        -7.7026e-05, -2.3421e-03,  7.2692e-04, -7.9271e-04,  1.4991e-04,
         1.6107e-03,  3.3657e-04,  2.2468e-03,  3.3999e-05,  7.5966e-04,
        -1.4625e-03, -9.6095e-05,  2.8079e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8168e+00,  7.9415e+01,  7.1917e-01, -4.9886e-01,  1.5738e+00,
         4.8979e-01, -1.7454e-01, -1.6464e-01, -2.2275e-01, -2.1866e-01,
        -3.5972e-01, -6.2995e-01, -5.1507e-01,  3.4033e-01,  5.9881e-01,
        -3.0468e-01,  3.1857e-02, -2.9478e-01,  4.0190e-01, -8.6754e-02,
        -1.2807e-01,  1.1951e-01, -7.2692e-01, -3.1077e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 5.8859e-02,  4.9740e+00,  6.4042e-02,  1.0174e-01, -5.1514e-02,
        -7.1891e-03, -1.3110e-02, -1.9942e-02,  1.7184e-02, -1.1232e-02,
        -2.0018e-02,  2.6885e-03,  1.4863e-02,  1.4319e-02,  8.6704e-03,
         2.1017e-03, -3.3464e-02,  1.1643e-02, -9.2870e-03,  3.5859e-02,
        -2.7744e-03,  2.2602e-02, -2.6893e-03,  5.9364e-04,  4.1997e-03,
         4.2518e-02, -5.2683e-03, -3.3926e-02, -1.8874e-02, -1.4271e-02,
         6.7253e-03,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2625e-01,  1.0649e+01, -6.4632e-02,  1.0914e-02, -4.2379e-02,
         1.5294e-01,  2.2479e-02, -4.4067e-02,  6.6315e-02,  9.5034e-03,
         9.7600e-02, -8.0708e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0787e-02,  1.6391e+01, -7.5769e-03, -2.0997e-02,  2.7211e-01,
         1.4193e-02, -7.4054e-03,  4.4016e-02,  1.3478e-01,  2.6061e-01,
         7.0182e-02, -3.1658e-02, -4.4876e-02,  4.6080e-02, -1.0014e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3733e-01,  4.6008e+01, -1.4109e-01,  1.9933e-02, -1.3819e-01,
         5.9319e-04,  2.1678e-01,  1.0670e-01,  1.1336e-01,  7.2073e-02,
         1.4046e-01,  1.7968e-02,  4.3715e-01, -5.3612e-02, -2.3234e-01,
        -7.4501e-02,  5.3439e-02,  2.9086e-02,  8.5918e-02, -2.8104e-01,
        -2.9160e-02, -4.4489e-02, -3.5437e-01, -1.1192e-01,  2.8578e-01,
         2.2408e-01, -4.2974e-02, -5.8998e-02, -2.4973e-01,  2.2656e-02,
         5.3978e-02, -7.6931e-02,  4.5701e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8424e-01,  1.2026e+01,  1.4693e-01, -8.4195e-02,  1.4424e-01,
        -4.1221e-02, -5.8482e-02, -7.0938e-02, -1.0707e-01, -5.5660e-02,
        -1.4622e-01,  1.1527e-03, -1.2777e-01,  7.5534e-02, -1.0118e-01,
         1.8736e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9267e-01,  1.2093e+01,  1.7153e-01,  2.0977e-01,  1.6642e-03,
        -1.0597e-01, -1.2992e-01, -3.7345e-02,  2.2433e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0003,  0.1215,  0.0025,  0.0022,  0.0069,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5660e-01,  9.9191e+00,  1.6687e-03, -5.4826e-02,  2.7996e-02,
         6.9940e-03, -9.6798e-03,  1.5023e-02, -8.0843e-03, -2.2333e-02,
        -8.0370e-03,  1.8811e-02, -4.8000e-02, -1.3103e-02,  1.5193e-02,
        -3.9725e-02,  1.7739e-02,  1.2622e-02,  1.1369e-02, -2.2821e-03,
         8.2297e-02,  7.9355e-03,  1.5976e-04, -9.5507e-03,  1.1709e-02,
         3.7343e-02, -4.7062e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2876,  5.0362,  0.0927,  0.0281,  0.0118,  0.0654,  0.0795,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3930e+00,  5.9521e+01,  2.8580e-01,  1.3149e-01,  2.8594e-01,
         1.2137e-01, -3.5165e-01,  3.8466e-02,  3.8779e-01,  6.2542e-01,
         2.6249e-01, -5.0188e-01, -1.4113e-01,  2.6229e-02, -2.0111e-01,
         3.2119e-02,  5.1740e-02,  3.2596e-01,  1.0835e-01, -3.7253e-01,
         4.3249e-02,  5.2079e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0433e-02,  1.9566e+00, -9.3517e-03, -4.2306e-03, -8.1795e-03,
         1.0616e-02,  1.6628e-03,  1.3382e-02, -5.9467e-03, -1.2845e-02,
         9.5076e-03,  2.0334e-03, -9.7702e-03,  1.5242e-03,  4.2461e-03,
        -5.7974e-03, -3.1418e-03, -3.1076e-03, -7.3861e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8831e-01,  3.9869e+01,  2.6539e-01,  3.3108e-01, -8.9202e-02,
        -2.3748e-03,  1.0308e-01,  1.5055e-01,  2.2942e-01,  1.5629e-01,
         1.5798e-02,  1.3987e-01, -2.3447e-02,  1.6498e-01, -5.6392e-02,
         2.5592e-02,  1.1444e-02,  4.6874e-02,  1.5994e-02, -5.0183e-02,
        -4.5294e-02,  2.0703e-01,  7.4989e-02,  3.5271e-02,  1.8623e-01,
        -4.0655e-02, -8.5234e-02,  1.7876e-02,  2.8293e-02,  4.9213e-01,
        -1.4945e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-6.2126e-02,  1.6420e+01,  1.1782e+00,  1.9708e-01,  6.7629e-03,
         3.1066e-02,  5.1037e-03,  6.8266e-02,  1.0900e-01,  4.8754e-02,
         4.1836e-02,  6.2006e-02, -3.0368e-02,  1.3119e-01,  4.8476e-02,
         9.7203e-03, -1.1502e-01,  5.6418e-02,  2.3637e-02,  2.7716e-02,
        -1.0485e-02, -3.9447e-02,  2.8660e-02, -2.7500e-02, -5.8824e-02,
        -1.0978e-02,  1.4367e-02, -1.3544e-01, -4.6100e-02,  5.7003e-03,
         9.5954e-02,  1.1208e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.9908e-02,  1.4950e+01, -7.0778e-02, -2.6321e-02,  9.0048e-03,
         6.7229e-02, -3.5003e-02, -7.6696e-03,  1.3149e-01, -5.0049e-02,
        -1.9522e-01,  3.6414e-02, -1.0451e-01, -5.5490e-02, -6.5689e-03,
        -1.5352e-02,  2.7207e-02, -6.5020e-02, -1.3217e-01, -3.2397e-03,
        -6.9029e-03,  8.9934e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3697e-01,  1.7952e+01,  5.1504e-02,  1.2977e-02,  1.0303e-02,
         7.3613e-02, -3.9677e-02,  2.9497e-02,  4.9186e-02,  1.2931e-02,
         2.3087e-02, -6.4181e-02, -6.5468e-02,  1.1932e-02, -2.0850e-02,
         2.4044e-03, -1.5148e-03,  2.2017e-01, -3.3814e-02, -1.4693e-02,
        -6.9267e-02, -1.0962e-01, -3.2342e-02, -5.5563e-02, -9.5073e-02,
         6.6729e-02, -2.5420e-02, -1.2771e-01,  1.5774e-02, -2.1865e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5199e-03,  2.9802e+00,  2.2672e-02, -8.5404e-03, -5.2287e-03,
        -4.2246e-03,  2.8479e-02,  8.6622e-03, -5.9346e-03, -1.2958e-02,
        -7.8943e-03,  1.5930e-02,  2.6059e-03,  5.1668e-04, -1.2228e-02,
         5.5555e-03, -1.7755e-02,  3.3638e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7640e-02,  1.9912e+00,  2.9630e-02,  1.7905e-03,  1.7616e-03,
        -1.0188e-02,  3.9425e-04,  8.1003e-03, -5.1864e-03,  3.8903e-03,
         4.5452e-03,  8.2223e-04,  4.3538e-03,  2.6911e-03, -1.6787e-04,
        -4.4950e-03,  8.6580e-03, -1.1952e-03, -8.7942e-03,  1.3970e-03,
        -5.4883e-03,  3.5131e-03,  7.7508e-03,  1.8194e-03,  3.3443e-03,
        -3.4666e-03, -2.1945e-02,  1.0260e-04, -1.4144e-03,  1.4900e-03,
        -1.3380e-03, -5.4852e-03, -1.8452e-04, -1.2843e-02,  1.3477e-03,
        -6.2628e-03,  2.9504e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3545e-01,  6.6287e+01, -7.5738e-01,  5.9527e-01,  1.9967e-03,
        -8.7239e-02,  3.1296e-01, -2.3554e-01, -5.4863e-02,  8.6542e-02,
         1.2462e-01, -1.1356e-01, -3.0236e-01,  2.6673e-01,  3.2576e-01,
         4.1962e-02, -2.9933e-01, -2.3418e-02,  6.1824e-02,  8.0075e-02,
        -1.0076e-01, -1.5752e-01,  3.4018e-02, -2.6706e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9841e-02,  2.9584e+00, -1.7936e-03,  2.6031e-03, -7.6226e-03,
         8.3290e-04, -5.3534e-03, -1.4413e-03,  4.2282e-03, -1.3156e-02,
        -5.7002e-03, -4.5301e-03, -7.5512e-03, -1.4575e-02,  4.9334e-04,
         2.1574e-03, -6.6099e-03, -8.0982e-03, -1.1239e-05, -1.9240e-03,
        -5.2771e-03, -1.5892e-02,  1.3917e-03, -9.0844e-03, -1.0414e-02,
         4.7941e-03,  9.8418e-04, -3.0140e-03,  2.0781e-03, -1.3208e-03,
         2.3055e-02,  4.6191e-04,  7.1151e-03,  7.3520e-03,  4.7959e-03,
        -8.1201e-04,  1.3338e-03, -2.5428e-03, -3.0873e-03, -1.9417e-04,
         3.7027e-03, -3.1392e-03, -1.3740e-04,  1.1290e-02, -5.0985e-03,
         7.0514e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4914e-03,  1.6837e+00, -2.9773e-02, -2.0503e-02, -4.2597e-02,
        -4.9021e-03, -2.9259e-02, -5.2635e-03, -2.9538e-03, -2.0424e-02,
        -7.7563e-04,  2.2711e-03, -3.0996e-04, -7.8402e-03,  8.5119e-03,
         2.2209e-03, -1.6931e-02,  4.0997e-02,  1.0134e-02,  7.9960e-03,
         1.2251e-02, -3.5566e-03,  1.4712e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1294e-01,  3.5897e+00, -4.2764e-02,  1.1205e-02,  6.1802e-02,
         1.8028e-02,  3.5634e-03,  4.4449e-02,  5.4843e-03, -2.0983e-02,
         1.9240e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1330,  6.4460,  0.0471,  0.0683,  0.0315, -0.0226,  0.0294, -0.0305,
         0.0188,  0.0271, -0.0454, -0.0401,  0.0579, -0.0160,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3479e-01,  3.3700e+00, -4.7857e-02,  3.0007e-02, -1.3589e-02,
        -2.3596e-03, -2.7651e-02,  1.1329e-02,  3.6550e-03, -6.4401e-02,
         8.9568e-04,  2.0490e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0771e-01,  6.4705e+00,  3.1959e-02,  6.1598e-02,  1.5002e-02,
         3.5563e-03, -3.1431e-02,  3.7490e-03,  6.5076e-03,  4.3014e-02,
         1.6207e-02,  7.0138e-02,  9.1908e-03,  1.5073e-02, -7.2049e-03,
         9.4092e-03, -5.3894e-03,  1.5065e-02,  1.9135e-04, -2.6716e-02,
        -7.0002e-03, -6.6966e-04,  6.5141e-03, -1.5804e-02,  3.4982e-03,
        -2.3564e-02,  2.0518e-02, -1.7117e-02,  4.3003e-03, -8.6105e-03,
         8.6827e-03,  6.7826e-02,  2.4744e-03,  9.1262e-03,  1.0106e-02,
        -1.3446e-02, -5.6519e-03,  5.3202e-03,  5.3217e-03, -1.0752e-02,
        -4.3221e-03,  2.9560e-03, -7.4810e-03, -4.5830e-03,  6.9714e-03,
         1.9450e-02,  4.1750e-02,  3.3864e-03, -3.1737e-03, -2.8978e-03,
         5.4476e-03,  2.3456e-04, -6.9475e-03, -1.6708e-03, -1.6655e-02],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-4.1039e-02,  7.7513e+00,  5.5601e-02, -3.6913e-02,  7.4321e-02,
         2.4352e-02, -2.2931e-02,  4.5010e-02,  2.8578e-02, -9.2974e-03,
        -7.0799e-02, -1.7459e-02, -1.7921e-02,  4.0337e-03, -3.6795e-02,
         8.2535e-03, -4.3865e-02,  7.5655e-03, -1.6375e-02, -5.3432e-03,
         2.1087e-02, -3.6004e-02, -2.0784e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0954e-02,  6.4979e+00, -9.7092e-02,  4.9310e-02,  6.6277e-02,
        -1.3869e-02, -3.8239e-02, -2.1712e-02, -6.1742e-03, -2.1440e-03,
        -4.5435e-02, -2.6914e-02, -2.3241e-02, -1.4181e-02, -1.6206e-02,
        -2.4327e-02, -1.5755e-02,  7.1544e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3518e-02,  3.6595e-01,  3.0529e-03,  2.9894e-03, -2.4844e-03,
         8.2247e-04, -3.6510e-03, -3.4225e-04,  2.2867e-03, -1.2544e-03,
         2.2062e-03,  3.9759e-03, -2.3244e-03,  4.0632e-03,  1.8200e-03,
        -9.2331e-04, -5.1735e-03,  1.7057e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5433e-03,  4.3840e-01,  1.4960e-03, -3.6347e-03, -5.3623e-04,
         2.6379e-04, -4.3952e-03,  1.9519e-03,  1.9346e-03, -8.4065e-04,
        -3.7060e-04, -3.1913e-03,  2.0087e-03,  7.4058e-03, -5.6972e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7819e-02,  1.9082e+00,  1.5928e-02, -2.1259e-03,  1.8022e-03,
        -3.8154e-03, -1.1365e-02, -6.2815e-03, -1.9597e-03, -7.0593e-04,
        -1.1552e-02, -1.1972e-02,  3.0905e-03,  4.2752e-03,  3.8623e-03,
         9.5667e-04, -6.2559e-04, -1.2426e-02, -5.6630e-03,  1.8474e-02,
         7.3195e-03, -4.4772e-03, -1.4597e-03, -6.1186e-03, -1.3852e-03,
         2.7453e-03,  6.9211e-03, -2.9725e-03,  7.3683e-03,  5.4524e-03,
        -1.2641e-02,  7.6753e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4987e-02,  1.3122e+00,  2.2303e-04, -2.2114e-03,  3.1061e-03,
        -1.5083e-03, -6.1532e-03, -6.8381e-03, -6.1599e-03, -5.5144e-04,
        -2.2870e-03, -6.8751e-03, -1.3815e-02, -7.8469e-05, -2.4909e-03,
        -7.8506e-04, -2.5335e-03, -5.3147e-03, -8.4582e-03, -4.6717e-03,
        -4.9105e-03,  2.2870e-03, -5.4815e-03, -4.6622e-03, -1.6411e-03,
        -3.3615e-04,  1.4003e-03,  1.5123e-03, -2.7162e-03, -2.2445e-03,
         2.3104e-03, -4.2542e-03, -4.3144e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.7102e-03,  4.8452e-01, -5.2947e-05,  1.0952e-02,  8.0980e-04,
         3.3243e-05,  2.7354e-03, -3.7497e-03,  1.1282e-03,  4.4948e-05,
        -6.7908e-04, -1.3194e-03, -2.4654e-03, -2.6214e-03, -4.4194e-03,
        -1.9452e-03,  5.7816e-04, -3.8783e-04, -1.5856e-03, -4.9355e-04,
        -6.9688e-04, -9.2632e-04,  4.2655e-05,  5.3145e-04, -2.8872e-04,
         7.2755e-04, -1.9660e-03, -1.1021e-03,  5.6631e-04, -1.4470e-03,
         1.0042e-03,  1.4740e-03, -6.8466e-04, -5.2594e-04,  9.6009e-04,
        -3.5397e-03, -3.9605e-04, -6.5279e-04, -9.1127e-04, -8.1755e-05,
         9.7627e-04, -6.9394e-04, -4.1320e-04, -3.2975e-04, -5.4182e-04,
        -1.4027e-04,  2.9859e-04, -2.6011e-04, -1.1598e-03, -8.2080e-04,
         2.7292e-03,  1.7715e-04, -7.4018e-04, -5.5885e-04,  1.3625e-03,
         1.1786e-03, -6.8211e-03, -2.4383e-03], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1127e-02,  1.0554e+01, -8.9759e-02, -1.0504e-01, -1.6083e-02,
        -3.9977e-02, -7.1280e-02,  6.2171e-02, -7.9468e-02, -1.0910e-02,
         5.8976e-02, -3.8804e-02, -4.2003e-02, -8.9313e-02,  1.6596e-03,
        -2.2886e-02,  2.4067e-02,  3.5346e-02, -4.3307e-02,  5.4615e-02,
        -4.3395e-02, -4.2651e-05,  4.0480e-02,  4.4637e-03,  6.7197e-02,
        -7.8318e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9037e-02,  1.7503e+00, -2.3749e-02,  8.9772e-03, -1.7787e-02,
         9.0261e-03,  9.5753e-03,  1.8305e-03,  4.3239e-03, -5.3405e-03,
        -5.3466e-03, -4.9717e-04,  1.6597e-02,  2.1801e-04, -1.3681e-03,
        -2.0752e-03,  3.8591e-03,  6.5330e-03,  6.2801e-03,  1.3905e-03,
        -3.0753e-03, -9.9061e-03, -5.7206e-03,  7.1396e-03,  4.9533e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6276e-01,  2.5248e+01,  2.7297e-01,  2.6435e-01, -5.8504e-02,
         4.1358e-03, -5.3632e-02,  5.1275e-02, -3.5162e-02, -1.0363e-01,
        -7.6842e-02, -2.4505e-02,  3.3097e-02,  1.6750e-01,  3.8192e-02,
        -1.0269e-01, -1.8727e-01,  6.7675e-02,  3.4289e-02,  1.1369e-01,
        -1.8077e-02,  4.1983e-02,  2.4380e-02, -1.0525e-01, -1.2636e-02,
        -9.5714e-02,  9.4377e-02,  3.2936e-03, -3.6205e-02,  3.9797e-02,
         1.0478e-03, -8.2873e-02,  1.2576e-01, -2.3423e-03, -1.0289e-01,
        -1.0404e-01, -2.8514e-02, -8.0751e-02, -1.0753e-02,  3.8361e-02,
        -3.9983e-02, -1.4058e-02,  4.6718e-02,  3.6673e-02, -1.8173e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1009e-01,  4.9858e+00, -2.6921e-02, -1.0786e-02, -7.6325e-03,
         1.3062e-02, -1.7004e-02, -9.6672e-03,  9.0067e-03,  1.4911e-02,
        -3.7354e-02,  6.4793e-03, -8.0792e-03, -1.2936e-02,  4.9094e-02,
        -5.2256e-02,  6.8244e-02, -2.3822e-02,  4.4388e-03,  5.2687e-03,
         2.4740e-02,  7.9654e-03,  9.8497e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0032,  0.1747,  0.0009, -0.0011, -0.0004,  0.0003,  0.0012,  0.0007,
        -0.0034, -0.0020,  0.0018, -0.0030,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 4.5846e-03,  1.1089e+00,  1.5441e-02, -9.0647e-03, -1.3674e-02,
         8.1304e-04,  3.5461e-03, -1.2972e-03,  4.0553e-03, -4.1937e-03,
        -1.5528e-03, -3.2048e-03,  2.3024e-03, -2.9340e-03, -4.6126e-03,
         5.5302e-03,  4.0231e-03,  6.0036e-03, -1.3031e-03,  4.8953e-07,
        -2.1450e-05,  3.4568e-03,  2.3087e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2281e-01,  7.1329e+00, -2.3388e-04, -2.1753e-02, -5.0295e-02,
        -2.9760e-02, -1.1510e-02,  2.0020e-02,  7.1267e-03,  2.5923e-02,
         1.7809e-02, -2.7083e-02, -1.7298e-04, -4.9869e-03,  3.7762e-02,
        -1.6889e-02, -5.7578e-03,  2.2330e-02,  5.6839e-03, -2.7574e-03,
         4.1925e-02, -1.0988e-02, -8.8044e-02, -2.1269e-02,  5.7709e-03,
        -1.3233e-02, -2.5657e-02,  8.8569e-04, -2.8538e-02,  3.6066e-04,
        -1.8069e-04, -3.1758e-02,  8.0574e-03], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3330e-02,  5.9905e+00, -8.7756e-02, -5.7589e-02, -9.6734e-03,
        -2.3550e-02, -1.0478e-02, -8.5284e-03, -2.4682e-02, -4.4196e-03,
         5.0304e-03,  5.3889e-04,  4.5277e-04, -2.1412e-02,  8.4970e-03,
        -1.8750e-02, -1.2034e-02, -2.5357e-02, -4.1992e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.7434e-03,  4.0398e-01, -6.9247e-03, -2.0621e-03, -4.1218e-03,
         4.0799e-03,  4.2601e-03,  2.7537e-03, -1.2711e-03,  5.6737e-04,
        -5.4437e-03, -4.3146e-03, -2.5521e-04, -2.7369e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5632e-03,  1.3600e+01, -3.8676e-02, -3.3640e-03, -8.3219e-02,
         6.6011e-02,  4.6969e-02, -5.0433e-02, -8.7488e-03, -8.2066e-03,
         5.0751e-04,  5.9420e-03, -1.3562e-02,  2.6119e-02, -5.2502e-03,
        -3.3147e-02,  3.0962e-02,  1.2976e-02,  1.1783e-02, -1.0777e-02,
         2.7936e-02,  5.2459e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.3736e-03,  2.1781e+00,  1.1461e-03, -5.8656e-03,  4.9325e-03,
        -9.6627e-03,  1.1320e-04, -3.4867e-02,  3.2753e-03,  7.7363e-03,
        -2.4789e-03,  6.5670e-03,  2.5085e-02,  1.9055e-03,  3.9110e-04,
        -4.1810e-03, -9.1277e-03,  7.8772e-03,  4.2055e-03, -4.7956e-03,
        -5.7170e-03,  4.1313e-03,  4.5244e-03, -8.5469e-03, -6.3093e-03,
         2.7028e-03,  6.5828e-03, -1.0061e-02,  1.0675e-03,  5.3666e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8664e-02,  8.2955e-01, -3.4557e-03,  1.4451e-03, -5.7043e-03,
         1.1177e-02,  4.6740e-03, -7.2475e-03,  2.5220e-03,  2.2447e-03,
         3.3421e-03, -3.8011e-03,  8.5716e-04, -3.0337e-04, -4.4625e-03,
         1.6757e-03, -2.3795e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4479e-02,  4.8453e-01,  9.8327e-03, -6.0881e-04, -1.3810e-04,
        -6.5823e-04, -2.7547e-03,  4.4464e-03,  7.5747e-03,  2.8384e-03,
         6.1371e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6609e-01,  2.7965e+01, -5.8291e-02, -1.7306e-02,  1.8620e-01,
         1.4695e-01, -1.0101e-02,  2.7778e-02, -9.6063e-02,  1.1878e-02,
         9.9236e-02,  1.1016e-01,  7.6784e-02,  3.9384e-01,  1.8916e-01,
        -4.2616e-02, -2.8437e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2156e-03,  1.1610e+00, -2.0294e-02,  1.3000e-02, -7.4745e-03,
         1.0191e-03,  1.6924e-03, -3.5590e-03,  1.3343e-02,  1.3207e-04,
         4.1593e-03,  2.0222e-03,  5.4901e-04,  8.1908e-03,  1.2450e-02,
        -1.5768e-03, -1.1918e-03,  1.5054e-03, -9.1262e-04, -1.0787e-04,
        -2.3623e-03, -5.3591e-04,  6.8582e-03,  1.4888e-03, -2.6010e-02,
        -4.5962e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.1568e-03,  3.1165e-01,  1.1281e-02,  1.3805e-03, -5.1263e-04,
         3.5838e-03,  5.9425e-04,  3.4370e-03, -7.1987e-04,  1.1695e-03,
        -2.2324e-03, -1.0858e-03, -2.9264e-04, -6.4153e-04, -1.1452e-03,
         1.0627e-03,  2.4123e-04, -9.4343e-04,  2.2269e-04,  1.2675e-04,
         2.4870e-04, -3.7271e-04, -1.0237e-03,  4.1080e-04, -5.2814e-04,
         4.7704e-04,  3.8985e-04, -1.2524e-03, -2.4611e-03,  3.5508e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0800e-02,  1.1548e+00, -6.3153e-03, -1.1173e-02,  3.2405e-03,
        -8.8266e-03, -1.2718e-02, -7.9920e-03, -5.7117e-03, -1.0633e-02,
        -2.0421e-03, -9.1918e-03, -1.7936e-03, -8.6844e-03, -5.1817e-03,
         2.9412e-05, -2.6658e-03,  1.4810e-02,  3.4434e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 3.8445e-03,  3.7431e+00, -2.3992e-03,  6.9582e-02,  1.9234e-02,
        -4.8889e-03,  8.0752e-02,  1.8705e-03, -1.7556e-02,  1.0278e-02,
         2.0261e-02, -5.9659e-04,  5.6678e-03,  3.6475e-03, -4.8553e-03,
         4.0705e-03,  1.4926e-04,  3.9198e-02,  1.0929e-02,  1.4091e-02,
        -1.1302e-02, -7.2097e-04,  1.5664e-03,  1.5509e-03, -4.1791e-03,
         5.5282e-04,  1.0638e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8506e-03,  2.6945e+00, -6.0388e-02,  1.2187e-02, -6.0389e-03,
         5.9045e-03, -4.3071e-03,  7.0661e-03,  6.2820e-03, -7.5043e-03,
        -3.8691e-03,  4.1368e-02, -1.5647e-02, -2.1189e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7732e-03,  1.0034e+00, -1.9612e-02,  8.0399e-03,  7.2488e-04,
        -2.2711e-03,  2.2210e-03,  1.3146e-03, -4.6777e-03, -7.3708e-04,
        -1.2597e-03,  7.0021e-04, -7.8013e-04,  2.0324e-03, -6.3749e-03,
        -2.5207e-04, -4.7378e-04, -4.4075e-03,  3.8956e-03, -7.0927e-03,
        -1.8997e-03, -3.6693e-03, -2.6867e-03, -4.6303e-04, -4.9139e-03,
        -3.6878e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0131,  0.5463,  0.0053,  0.0044, -0.0009, -0.0039, -0.0008,  0.0011,
         0.0028,  0.0007, -0.0021,  0.0042,  0.0066, -0.0007,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2884e-03,  1.0602e+00,  6.5402e-03, -9.3892e-04, -3.2038e-03,
         3.5704e-04,  3.0455e-03,  3.3418e-04,  1.9708e-03,  4.8433e-04,
         3.9944e-04,  2.5702e-04,  4.3000e-03,  3.0658e-04,  1.5798e-04,
         2.9953e-03,  1.1179e-03, -1.1530e-04,  2.2774e-03,  1.0288e-03,
        -5.1666e-04, -1.2082e-03,  1.6217e-03, -1.7807e-03,  1.6420e-03,
         4.9075e-03, -8.4298e-04,  3.7737e-03, -1.9903e-03,  4.3712e-04,
         8.3090e-04,  2.4888e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1543e-02,  2.9698e+00,  1.1132e-02,  6.6372e-03,  1.1639e-03,
         2.2062e-02, -1.5605e-02, -2.4123e-02,  1.4900e-02,  6.1342e-03,
         1.2898e-02,  1.0073e-02, -4.1425e-03, -1.6165e-03, -9.6836e-05,
         8.6634e-03,  5.9435e-04,  4.2984e-03, -2.4621e-03,  1.1179e-02,
         3.1837e-03,  6.5615e-03,  4.0397e-03,  1.9646e-03, -2.0138e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0454,  0.8339,  0.0256,  0.0065, -0.0054,  0.0018,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1328e-02,  7.6002e-01,  1.2234e-02, -4.7414e-03,  9.1989e-04,
        -3.2066e-03,  3.7546e-03, -3.1150e-05, -8.3034e-04, -6.1068e-04,
        -1.3982e-03,  1.3261e-04, -9.8638e-04,  9.7814e-03, -3.0687e-03,
        -3.6811e-03,  1.4851e-03, -3.5515e-05,  1.7034e-04,  1.1212e-04,
        -3.4777e-03,  1.9175e-03,  2.7282e-04,  9.0996e-04,  1.0109e-03,
         1.5195e-03, -3.0748e-04,  2.4952e-03, -2.2469e-04,  3.2220e-03,
         1.6783e-03,  6.7284e-04, -1.8595e-04, -2.3465e-03,  2.2996e-03,
         6.1803e-04,  8.0553e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4033e+00, -1.0966e+02,  3.6917e-01,  1.4316e-01,  7.5607e-02,
         3.1769e-01,  1.7160e-01,  2.1217e-01,  2.8391e-01, -6.2237e-02,
         1.4781e-01,  1.5271e-02,  1.8141e-02, -1.3875e-01,  1.3427e-01,
         4.2865e-01,  1.3179e-01, -3.7527e-01,  4.5714e-02, -3.9069e-01,
         4.6824e-01,  1.9424e-01,  7.9604e-01,  2.4361e-01,  2.9350e-01,
        -9.7006e-02,  5.5848e-01, -1.5143e-01, -1.5136e-02, -3.8723e-01,
        -8.7179e-02,  3.4659e-02,  2.0069e-01, -1.7835e-01,  2.4703e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.8859e-02,  7.2435e+00, -3.2610e-01, -1.2631e-01,  1.4780e-02,
         1.1194e-02, -4.1232e-02, -4.2969e-02, -1.9415e-02,  5.4840e-02,
         7.1306e-03, -3.6191e-02,  1.1079e-01,  8.7757e-02, -7.1258e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6024e-01,  5.2477e+01, -1.7547e-02,  1.2097e-01,  2.7495e-01,
         1.3429e-01,  1.0379e-01,  1.6113e-01,  6.1436e-02,  2.1648e-01,
         6.4890e-04,  1.7752e-02, -1.9399e-02, -5.4450e-02,  4.6793e-03,
         5.6418e-03,  1.0385e-02, -2.4497e-02,  1.4016e-01,  4.4406e-02,
         2.1835e-02,  8.8972e-02, -3.6555e-02,  2.7926e-02, -1.2301e-01,
        -7.4645e-02,  1.0714e-02,  9.9324e-02, -2.8322e-02,  5.7847e-02,
        -2.5866e-02, -1.0659e-01, -7.3750e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1337e+00, -8.6921e+01, -1.0826e-01,  4.8164e-01,  4.0823e-01,
         1.1233e+00, -7.5583e-02, -6.3120e-02,  6.4162e-01,  4.0504e-02,
         8.0319e-03,  2.1469e-01,  4.0312e-01,  7.7552e-01, -2.4440e-01,
         7.6577e-02, -7.3869e-02,  1.7579e-01, -1.1746e-01, -1.2415e-01,
         6.4940e-02,  1.2607e-01,  1.0664e-01,  5.2277e-02, -8.8291e-02,
         2.9723e-01, -1.2347e-01,  5.1878e-01,  3.4098e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 1.1032e-01,  3.8864e+00,  1.4539e-03, -3.4084e-02, -1.0229e-01,
         9.9417e-03, -9.1870e-04,  2.0803e-02, -8.7862e-04, -1.2387e-01,
        -1.0688e-02,  1.7834e-02, -4.2602e-03,  2.4184e-02,  8.0279e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2413e-01,  1.2598e+01, -9.0950e-02, -2.8838e-02, -5.2116e-02,
         7.4563e-03,  3.7132e-02,  8.2269e-03,  3.8991e-02, -1.2242e-02,
         5.8717e-03,  3.2695e-03,  2.0380e-02,  2.6455e-02,  6.0025e-03,
        -5.3941e-03,  1.5237e-02,  6.7834e-03,  2.1353e-03,  3.6688e-02,
        -4.7485e-03,  9.3033e-03,  9.2977e-03,  1.2405e-02,  7.9807e-03,
        -1.1982e-02, -3.2002e-02,  3.4431e-02, -5.7238e-02,  5.4309e-03,
         1.7417e-02, -5.6503e-03, -1.3881e-04, -1.4589e-02,  2.0584e-02,
        -3.1699e-03,  2.8167e-02, -4.2084e-03,  2.5226e-03, -2.2026e-02,
        -1.7358e-03,  4.2033e-02,  7.5241e-02, -6.7601e-03,  1.1066e-03,
        -3.3014e-02,  8.8301e-04, -6.0837e-02, -1.1877e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.4815e-01, -1.4541e+02, -5.0836e-01, -6.6336e-02, -9.9945e-01,
        -4.8836e-01,  4.9138e-02, -3.4648e-01,  2.4994e-01, -4.4273e-01,
         2.3878e-01, -4.4839e-02, -3.9529e-01, -2.2452e-01,  9.3056e-02,
         4.0492e-01, -2.9810e+00, -1.0626e-01, -3.9372e-01, -7.8660e-02,
        -1.1424e-01, -9.2596e-02,  7.6013e-01, -2.7932e-01, -7.6438e-02,
         3.5418e-01,  3.8683e-01,  1.5358e-01, -5.7153e-02, -7.3674e-02,
        -1.0871e-01,  2.2747e-02, -9.2328e-02, -1.0811e-01,  7.6220e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5889e-03,  5.6404e-01,  6.9990e-04,  3.2679e-03,  2.0173e-03,
         2.0653e-03, -3.0087e-03,  8.9480e-04,  1.5097e-04, -9.2978e-04,
        -2.4989e-03, -1.1435e-03,  1.8015e-03,  4.8063e-04,  7.3481e-04,
        -4.5463e-03,  1.9218e-03,  5.4710e-04,  5.7331e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5626e-02,  4.0955e+00, -5.7642e-02, -7.0534e-02, -2.7951e-02,
        -1.8134e-02, -3.3258e-02,  3.9522e-04,  3.0330e-02, -2.4483e-02,
        -2.0498e-02,  4.5660e-03,  1.0435e-02, -4.1432e-02,  4.2247e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8189e-04,  4.2642e-01, -4.2903e-03,  5.0388e-03, -2.6215e-03,
         9.8891e-04,  2.4038e-03, -5.2865e-04,  9.5053e-04, -1.8182e-03,
        -1.4448e-03,  3.8002e-04, -2.2944e-03, -1.7460e-03, -1.8469e-03,
         1.4056e-03, -4.1410e-04, -5.9343e-04,  2.8111e-03,  1.5994e-04,
         1.2720e-02,  1.0386e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7707e-03,  6.1704e-01, -4.7120e-03, -5.8113e-03, -8.9292e-04,
         3.9094e-03,  7.6092e-04,  2.0331e-03, -5.9700e-03, -8.3986e-04,
        -1.0527e-03,  1.0593e-03, -3.4136e-03, -2.4709e-03,  1.0953e-03,
         3.4031e-04, -5.5935e-05,  2.0769e-03,  4.8606e-04,  1.9386e-04,
        -3.5558e-04, -2.1637e-03,  5.1192e-04,  1.2771e-03,  1.0041e-03,
         2.4050e-03, -7.1584e-04,  2.0755e-04, -1.2204e-03,  3.6018e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0028,  0.8006, -0.0104, -0.0129,  0.0130, -0.0039,  0.0051, -0.0178,
        -0.0094, -0.0077, -0.0034,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4882e-03,  2.9578e-01,  3.7701e-03,  3.5653e-04,  9.2614e-04,
         1.5643e-03,  1.5691e-03,  8.0837e-05, -1.2960e-04,  8.5789e-04,
         6.2093e-04,  3.9265e-04,  6.6425e-04, -6.6742e-04, -4.2606e-04,
         8.0403e-04,  6.1971e-05,  4.6750e-04, -7.9409e-04, -1.1011e-03,
         1.8914e-03, -2.4225e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9633e-01,  1.9759e+01, -5.3741e-02, -2.9596e-02,  7.7842e-02,
         2.9110e-01,  5.9467e-02,  3.3257e-02,  3.9752e-02,  2.2569e-02,
         3.3038e-02,  5.7138e-02,  1.3355e-01, -7.2653e-02, -3.5393e-02,
        -1.5530e-02,  1.4722e-02,  5.5225e-02, -2.8579e-02,  3.7777e-02,
         2.6463e-03, -3.3973e-02, -1.7088e-02, -2.0911e-02,  6.6102e-02,
        -3.4458e-02,  2.5570e-02, -1.2165e-01,  1.3183e-01, -7.6214e-03,
         1.0174e-01,  1.1771e-02, -1.6389e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7688e-01,  2.4098e+01,  1.5263e-01,  7.9536e-02,  1.8829e-01,
         1.5805e-01,  2.2269e-01,  4.7748e-02, -7.4391e-02, -7.2292e-02,
        -3.6242e-02, -3.2270e-02,  1.2339e-02,  3.5645e-02, -4.9274e-02,
         4.4409e-01,  3.3884e-02, -1.0960e-01,  1.9153e-02,  4.7828e-02,
         1.7661e-01,  1.0836e-02, -5.3571e-03, -6.5664e-02, -2.5443e-01,
         2.8995e-02,  4.5021e-01, -9.8095e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.7147e-02,  6.5717e+00, -4.0986e-02,  3.2098e-02, -2.4356e-02,
         2.4798e-03,  8.2906e-02,  7.7338e-03, -6.8092e-02,  5.0477e-03,
        -2.4733e-02,  5.5482e-03, -5.3948e-02,  1.2713e-02, -2.2496e-02,
         1.0875e-02, -8.3665e-03, -4.5785e-02, -2.1542e-02, -1.4112e-02,
         8.2900e-03,  8.9543e-04,  4.5194e-02, -5.3380e-02,  1.3985e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 1.2809e-01,  1.2929e+01,  1.9702e-01,  3.8169e-02, -1.6745e-02,
         1.0846e-01,  4.3634e-02, -1.3472e-02, -3.9448e-02,  2.1332e-02,
         1.1948e-01,  1.8510e-02, -1.5929e-02,  3.2548e-03,  8.5652e-03,
        -2.3838e-01,  2.5682e-02, -1.0178e-02,  1.8457e-03, -1.5643e-03,
         2.1123e-02,  1.1156e-02, -3.5270e-03, -2.2956e-02,  3.7391e-02,
         5.5702e-03,  2.0491e-02, -6.5077e-03,  4.4391e-03,  5.6199e-02,
        -7.5816e-03, -1.0714e-02, -7.6493e-02, -3.3853e-03,  5.5153e-03,
        -1.4783e-02, -1.7062e-02, -2.7593e-02,  1.3865e-03, -1.5574e-02,
         1.6900e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.7191e-01,  7.5000e+00, -1.0110e-01, -3.4742e-02, -1.0502e-02,
        -3.6271e-02, -2.3623e-02,  1.6064e-02, -2.9550e-02,  5.2018e-03,
        -1.5381e-02, -1.3943e-02,  3.7574e-02, -2.8716e-02,  2.1054e-02,
         4.9157e-04,  1.0154e-02, -2.7462e-02,  7.9041e-03,  6.2503e-02,
        -7.0194e-03, -1.7586e-03,  1.7909e-03, -1.3906e-02,  1.1816e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3808e-01,  1.1414e+01,  1.3536e-01, -1.7742e-01, -2.4296e-02,
        -4.9662e-04, -5.9292e-02,  3.6571e-02, -5.8681e-03, -1.0462e-02,
         3.7719e-02,  6.6847e-02, -5.7041e-03,  3.1824e-03,  6.8511e-04,
        -1.6170e-02, -2.3876e-03,  6.7634e-03, -2.2162e-02,  7.3961e-02,
        -2.2579e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3459e-01,  7.6504e+00,  1.1333e-01, -7.2070e-02, -4.9246e-03,
         2.6731e-03, -1.4349e-02, -3.4676e-03, -2.6459e-02, -4.6102e-02,
        -1.2148e-02, -1.3604e-01,  5.3799e-02, -4.0525e-02, -8.1299e-02,
        -1.1769e-03,  5.0475e-02, -1.0227e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8415e-03,  1.4832e+00, -1.6478e-02,  1.3036e-02, -1.0919e-03,
         1.2396e-02,  4.2722e-03, -4.6316e-04, -7.7644e-04,  8.7150e-04,
         1.8883e-03, -1.9723e-04, -4.5821e-03,  5.1597e-03, -6.8738e-03,
         8.8798e-03,  2.7888e-03,  1.4160e-03,  5.3564e-03,  6.0067e-04,
         4.0532e-03, -7.7388e-04, -1.4248e-03, -2.8267e-03,  3.6016e-03,
         9.9899e-04,  1.5505e-03, -1.1041e-04,  2.0713e-04,  8.0810e-04,
        -3.6140e-03,  4.2774e-03, -6.4502e-04, -1.4636e-03,  1.7668e-03,
         5.6081e-03,  2.4657e-03, -3.5872e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.3359e-02,  3.6241e+00, -9.7547e-02,  3.6129e-02,  1.3390e-02,
         6.3640e-03, -2.0502e-02,  2.7276e-02,  1.6245e-02, -7.3674e-03,
         5.6651e-03, -2.5958e-02, -2.4509e-03,  1.4572e-02,  1.7358e-02,
         4.8047e-03,  8.1324e-03, -4.5298e-02,  2.6585e-03, -4.9206e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0845e-02,  3.3302e+00, -6.2366e-03,  3.1012e-02,  9.8097e-03,
        -1.2598e-02,  2.6331e-02,  1.9969e-02,  1.0033e-02,  1.1505e-03,
        -2.2905e-02, -2.6907e-04, -2.6416e-02, -1.4692e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2292e-01,  4.1311e+00,  3.1942e-02,  1.1530e-02,  2.8486e-03,
        -1.6004e-03, -1.5237e-02, -8.9444e-03, -2.8545e-03,  2.1967e-03,
        -2.0391e-02,  3.8719e-03,  4.9443e-03,  2.3923e-02,  7.1884e-03,
        -1.0060e-02, -1.2251e-02,  2.1357e-03,  1.5504e-02,  1.2344e-02,
         4.6423e-03,  2.9614e-02, -4.6703e-03, -1.1220e-02, -1.9466e-03,
        -1.7273e-02,  1.0380e-02, -1.9703e-02, -1.9845e-02, -9.2234e-03,
        -6.6711e-03,  2.9948e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4582e-01,  6.5013e+01, -1.5307e-02,  7.3746e-01, -1.6400e+00,
        -1.8968e-01,  1.6712e-01, -4.1524e-01, -2.0952e-01, -3.1800e-01,
        -9.8697e-02, -2.9692e-01,  8.3971e-03,  1.0868e-02,  1.2940e-01,
        -1.4211e-01, -7.9044e-02, -1.3878e-01, -3.1446e-01, -1.3688e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5847e-03,  4.3371e-01,  2.0134e-03, -7.5939e-04,  1.7996e-03,
         5.5130e-04,  2.9575e-05, -9.7422e-04,  1.4872e-03, -6.4556e-04,
        -7.5127e-04,  1.3871e-04, -1.1577e-03, -3.7645e-04, -1.6647e-03,
        -7.5959e-04,  1.4154e-03, -6.0197e-05,  1.1585e-03, -9.7039e-04,
         4.0860e-04, -3.4916e-04, -5.2377e-04,  1.8063e-04, -1.4563e-04,
        -4.0450e-04, -2.0131e-03,  1.2207e-03, -3.0718e-04,  1.0082e-03,
        -1.8366e-04,  4.8746e-04, -1.8087e-04,  1.4136e-03, -6.3623e-04,
        -6.3916e-06,  1.3451e-05,  1.7835e-03,  9.6719e-04,  7.0596e-04,
         7.1294e-04, -2.8642e-04,  6.9042e-04, -1.2280e-03,  1.3018e-03,
         5.1944e-04,  6.7641e-04,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6582e-02,  7.5595e+00, -1.1305e-02,  4.9051e-02,  9.7678e-02,
         1.0393e-01, -1.0333e-02, -7.9066e-03,  1.0308e-02, -7.3015e-02,
        -2.4870e-02, -5.9211e-03, -1.0918e-02, -6.3559e-02, -4.7302e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9763e-02,  1.1566e+00,  2.4925e-03,  3.9674e-03,  4.1295e-04,
        -4.5734e-03,  2.0699e-03,  6.5703e-03,  1.6373e-03, -1.0677e-03,
        -2.2550e-03, -8.1613e-03, -2.6640e-03,  1.7421e-03, -2.1762e-03,
        -6.7011e-03, -5.3109e-04, -6.6545e-05,  6.7664e-04, -2.7028e-03,
        -1.8565e-03, -6.9599e-03,  1.0075e-03, -5.1718e-04, -9.8654e-04,
        -4.9909e-03,  2.4145e-03,  4.7523e-04, -3.7035e-03,  2.4721e-03,
        -5.7020e-04,  1.9347e-03,  1.6292e-04,  1.6141e-03,  8.4093e-04,
         4.5092e-03, -2.7676e-03, -1.5701e-04, -5.7368e-03, -1.4875e-03,
        -5.8223e-04, -4.8366e-04, -1.5085e-03, -3.8310e-05, -3.8978e-03,
         7.9472e-04, -5.0874e-03, -2.9130e-03], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-4.9202e-02,  4.7686e+00, -9.5092e-03,  3.3178e-03,  5.0646e-02,
        -3.3934e-02,  2.0445e-02,  8.3918e-03,  5.4244e-03, -3.9123e-02,
        -8.5371e-03, -2.9293e-02, -5.4003e-03,  1.4129e-03,  4.1125e-03,
        -1.3017e-02, -3.8375e-02, -1.9609e-02, -3.4461e-03, -2.9483e-04,
         6.3667e-03, -8.8694e-03,  9.7828e-03, -2.6036e-02, -9.9855e-04,
         7.6922e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4983e-02,  7.1070e+00,  1.7112e-04,  2.5343e-02, -1.1746e-02,
        -4.2209e-02,  2.0977e-02,  5.1183e-02,  1.2562e-02, -5.2247e-03,
        -4.4285e-02, -1.6566e-03,  9.8237e-03, -3.0968e-02, -1.6379e-02,
         1.8766e-02,  3.2629e-02,  1.6281e-02,  9.5637e-02, -3.3026e-02,
        -1.8890e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1095e-02,  1.0314e+00, -2.3364e-03, -9.4895e-03, -6.3714e-03,
        -1.5177e-02, -4.5193e-05, -5.3760e-03, -1.6524e-03, -3.1334e-03,
        -3.1786e-04, -7.5451e-04, -3.6750e-03, -1.4169e-02, -5.2821e-04,
        -3.5197e-03,  1.4737e-03, -8.3087e-03,  3.4604e-03, -1.4716e-03,
        -3.1203e-03, -2.4188e-03,  1.5721e-03,  1.1993e-03,  2.7456e-03,
        -8.7459e-04,  8.2968e-04, -2.2678e-03,  2.3278e-04,  1.5874e-03,
        -6.0213e-03, -2.1341e-03,  3.8353e-03, -1.5585e-03, -8.5791e-03,
         3.0185e-03,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5582e-03,  8.6056e-01, -2.4556e-03,  1.2325e-02,  2.5432e-03,
         6.5675e-03,  6.2146e-05,  1.1429e-02,  9.1971e-03,  2.4404e-04,
         1.2094e-02, -1.8923e-03,  8.9032e-04, -9.8363e-03, -6.5892e-03,
         9.2712e-03,  3.2480e-03, -3.4166e-04,  2.0829e-02, -2.1610e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3852e-03,  4.5461e+00,  1.8286e-02,  3.3250e-04,  2.5708e-02,
        -5.1253e-03, -2.6389e-02, -1.5112e-02, -7.8392e-03,  1.6006e-02,
         2.4604e-02, -3.0994e-02, -3.4769e-03, -9.5770e-04,  2.0407e-02,
         7.9455e-03,  2.8495e-03, -7.8146e-03,  4.0061e-03,  1.4665e-03,
        -2.0078e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6600e-02,  1.5564e+00, -1.3606e-03, -1.4566e-02, -1.7658e-02,
        -1.0975e-02, -5.3203e-03, -1.4504e-02, -1.1700e-03,  1.1342e-02,
         5.5639e-03,  1.3663e-02, -3.3800e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6274e-01,  7.3505e+00, -3.5745e-02,  4.1611e-05,  1.6880e-02,
        -1.5735e-02,  4.9397e-02,  3.8276e-03,  2.4257e-02, -5.3234e-03,
        -6.5987e-02, -8.4160e-02, -1.2516e-02, -2.8258e-03,  2.7471e-02,
        -2.5505e-02,  1.4181e-02, -1.5713e-02, -9.3432e-03, -9.2995e-03,
        -5.3110e-03,  1.2745e-02,  2.0962e-03, -8.6880e-03, -4.9581e-03,
        -8.4250e-04,  1.8773e-02, -2.4061e-02,  4.2067e-03, -1.2228e-02,
         1.3502e-02, -1.4339e-05, -5.9376e-03, -3.3258e-02,  1.0274e-02,
        -1.3083e-04,  1.8073e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6144e-03,  1.4862e+00,  5.3268e-02, -1.2817e-02,  1.6017e-03,
         2.0472e-03,  8.4218e-04,  3.4507e-03,  1.5456e-03,  2.8091e-03,
         1.5778e-03,  4.8523e-03,  1.0599e-02,  2.3546e-03, -2.4226e-03,
         4.1344e-03,  7.6392e-03,  1.5553e-03,  1.8560e-03, -3.2961e-03,
         1.6590e-03,  9.7742e-03,  1.3834e-04, -5.4990e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8727e-01,  3.1597e+00,  4.4291e-02,  2.8708e-02,  1.2906e-02,
         1.1820e-02,  3.1733e-03,  1.3851e-02,  2.6831e-02, -1.5140e-02,
        -1.9908e-02,  4.1206e-02,  1.5100e-03,  1.2148e-02,  2.4845e-02,
         1.0628e-02,  9.3013e-03,  2.1050e-03, -1.1220e-03, -1.0787e-02,
        -3.0684e-03,  8.3348e-03,  7.1741e-03, -3.2799e-03, -1.2750e-03,
        -5.1209e-03, -4.0188e-03, -1.0377e-02,  2.6074e-03,  6.5495e-03,
        -1.4502e-02, -3.1744e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.4473e-02,  8.2965e+00,  1.5229e-01, -1.4399e-02, -8.3013e-02,
        -7.6632e-02, -1.5421e-02, -1.4205e-02,  2.7859e-02,  4.1793e-02,
         2.1856e-03,  6.8012e-03,  4.4878e-03, -1.3500e-03,  7.9263e-03,
         6.0850e-03,  2.5665e-02, -9.5528e-03,  2.9985e-02, -2.0903e-02,
         2.1036e-02,  3.3901e-03,  2.6504e-03, -1.5912e-02, -1.0390e-02,
         6.1971e-03,  2.6532e-02, -8.4995e-03,  3.4911e-02, -6.0091e-02,
        -2.4192e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.9403e-02,  1.3120e+01,  2.2511e-02, -5.8390e-03,  2.9673e-02,
         7.6928e-02, -9.2471e-03, -4.7273e-01, -6.0575e-02, -6.4128e-02,
         5.9378e-02,  2.9635e-02,  9.8184e-02, -1.4158e-01, -8.6919e-03,
        -7.0719e-02,  1.9854e-03,  3.0183e-02,  3.8257e-02, -2.9810e-02,
        -6.9737e-02, -3.7598e-02, -3.6533e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3227e-02,  2.1494e+00,  3.7347e-02, -1.6947e-02, -1.6779e-02,
        -3.6089e-04, -3.8358e-03, -8.5776e-03,  1.5187e-02,  4.0545e-03,
        -4.4070e-03,  3.7907e-04, -4.9289e-03,  6.1199e-03,  8.9984e-04,
        -5.7507e-04, -8.3353e-03, -7.4108e-03,  8.1915e-03,  1.7593e-02,
        -8.8348e-03,  4.0405e-04,  8.6777e-03,  2.4453e-03, -2.3570e-03,
        -4.8657e-04, -1.4462e-03, -5.5546e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-1.2969e-01,  6.7027e+00, -2.4648e-03,  1.7430e-02,  3.5763e-03,
         2.4105e-02,  2.1420e-02, -4.9044e-02, -8.1712e-03, -3.7326e-02,
        -1.8571e-02,  9.2465e-04,  1.9840e-02,  7.9341e-03,  1.7107e-03,
         3.6395e-02, -8.4090e-03, -1.7104e-02,  1.8814e-03, -2.9326e-02,
        -1.7942e-03, -4.6348e-03, -3.3968e-02,  8.6948e-03, -2.6177e-03,
        -1.4066e-02, -2.0378e-04,  2.8560e-02, -9.2292e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7000e-02,  3.2160e+00, -1.5114e-03, -1.1544e-02, -1.0605e-02,
         1.7058e-02, -1.1462e-02,  1.0355e-02,  1.9346e-04,  2.5047e-03,
        -1.3576e-02, -3.8021e-03, -1.3644e-02,  4.2020e-03,  1.6223e-02,
         7.2263e-03,  3.6102e-03, -9.5701e-03, -4.5457e-03,  1.3664e-02,
         4.0132e-03,  4.3505e-04, -1.4040e-02,  1.0561e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.8686e-01,  3.0474e+01,  2.7430e-01,  2.4501e-01,  9.7150e-02,
         9.6578e-02, -1.5094e-01,  3.9039e-02,  1.7071e-02,  4.0927e-03,
         6.5045e-02,  9.0438e-02, -8.2553e-03,  7.4279e-02, -2.0301e-02,
         1.0928e-01,  1.6388e-01,  2.6912e-02,  1.5545e-02,  3.9697e-02,
         1.5178e-01, -4.3678e-02,  8.6438e-03,  4.3140e-02,  2.0336e-01,
         2.3968e-03, -2.0701e-01,  8.5235e-02, -7.7253e-02,  1.3922e-02,
         5.4771e-02,  1.3121e-01, -7.8687e-02,  2.1107e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4671e-03,  1.6514e+01, -2.7101e-01,  6.2577e-02,  7.4833e-03,
         3.4328e-02,  8.1066e-02,  1.5866e-01,  1.0812e-01,  1.1055e-03,
         7.3113e-02,  7.3628e-02, -6.2819e-03,  5.8604e-02,  3.0340e-02,
        -1.0744e-02, -3.8989e-02,  1.5255e-02,  2.6971e-02,  6.4392e-02,
        -6.3323e-02, -1.1123e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9055e-02,  1.1394e+01, -3.2662e-02,  8.4041e-02,  2.5312e-02,
         6.6417e-02, -3.0675e-02, -2.2411e-02,  2.5769e-02,  2.9410e-02,
         2.0517e-01, -4.6657e-02,  1.1330e-02, -6.5304e-02,  2.0415e-02,
         5.5923e-02,  1.4169e-02,  4.9689e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0992e-03,  2.1479e+00, -2.2140e-02, -3.1015e-03,  3.2474e-03,
         2.1439e-02,  7.7036e-04, -6.0610e-03, -1.0410e-02,  7.6358e-03,
         1.4042e-03, -8.5197e-03, -3.1077e-03,  3.4464e-03,  2.2184e-03,
         3.1966e-03, -7.4859e-03, -8.1686e-03, -6.8043e-03, -4.6571e-03,
         4.6816e-03, -1.0730e-03,  3.6445e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.9719e-02,  4.5195e+00, -6.0004e-02, -3.0541e-02, -3.9667e-02,
        -1.9252e-02,  1.0851e-02,  1.7451e-03, -1.5034e-02, -1.0725e-02,
         3.4641e-03,  3.7805e-04,  6.9707e-03, -3.7886e-03,  7.8396e-03,
         8.6634e-03,  2.0201e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0096e-01,  2.5832e+01, -3.8044e-01,  1.8782e-01,  2.0399e-01,
         2.5477e-03,  2.9576e-03, -5.6860e-02,  7.0964e-03, -1.4629e-02,
         5.4371e-02,  8.6735e-03, -1.0937e-01,  4.2041e-02, -4.2754e-02,
        -8.5066e-02,  8.2059e-02,  2.0407e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5764e-02,  2.9148e+01,  1.7098e-02,  7.6490e-02,  1.3106e-01,
         9.0412e-02, -5.5355e-02, -1.2006e-01, -7.8460e-03,  4.0265e-01,
        -1.0942e-01, -3.4373e-01,  1.0919e-01, -1.3574e-01,  6.0736e-02,
         2.9210e-01, -1.5010e-01,  9.7013e-03,  7.3067e-02, -3.3801e-02,
        -3.0169e-02,  2.6913e-03,  1.4414e-01, -6.9024e-03,  1.6150e-01,
         1.2177e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7369e-01,  1.2652e+01, -1.6397e-01,  2.1173e-02,  5.4158e-02,
         4.7783e-02,  1.3183e-03,  7.1766e-02,  5.9953e-02, -4.3902e-02,
        -4.7881e-02,  3.6388e-02,  4.1370e-03,  4.7666e-02,  8.3336e-03,
         1.6791e-02, -8.9887e-03, -3.2803e-02,  1.3886e-02, -8.0881e-03,
        -1.6686e-02, -2.2365e-02,  1.8263e-02,  2.4446e-02,  1.4695e-02,
         7.7520e-03, -1.2325e-02,  1.2335e-02,  1.8863e-04, -5.5494e-02,
         7.9428e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5274e-03,  7.6261e-01,  2.8468e-03,  3.5571e-03, -7.5685e-03,
        -3.7995e-03, -1.8130e-03, -3.0119e-03,  9.2846e-06, -2.0710e-03,
        -4.9090e-03, -5.5651e-03, -4.2457e-04, -4.3648e-03, -9.6060e-03,
        -2.5524e-03, -1.9186e-03,  8.7009e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0729,  2.0204, -0.0554,  0.0081,  0.0055,  0.0183, -0.0024,  0.0022,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-6.2905e-02,  2.6679e+00, -1.7594e-02, -1.1972e-02,  6.4162e-03,
        -8.5550e-03, -9.4161e-03, -8.5404e-03, -4.3066e-03, -2.6893e-03,
        -6.6736e-04, -1.9744e-03, -6.9833e-03, -1.1512e-02, -1.5108e-03,
         2.6801e-03, -7.8411e-03, -7.2535e-03,  4.3482e-04,  4.4001e-03,
        -2.7247e-03, -2.6951e-03, -1.2135e-02, -5.0781e-04, -2.4197e-03,
        -6.7331e-03,  4.8558e-03,  6.9909e-03, -1.8404e-03,  1.3670e-03,
         8.0901e-04, -2.9222e-03,  9.7676e-04,  4.8363e-03,  5.2863e-03,
         2.5753e-03, -8.8397e-03, -1.1588e-03,  6.6578e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8296e-02,  4.8175e+00, -5.3121e-02, -1.4221e-02,  3.2605e-03,
         1.1537e-02, -1.4333e-02,  5.5231e-03, -1.8219e-04,  3.8546e-03,
         8.7729e-04,  8.2359e-06, -1.2344e-02, -1.5663e-02,  1.8273e-02,
         7.0688e-03,  7.3130e-03, -1.0286e-02,  1.0137e-02,  1.0891e-03,
         1.7030e-02,  5.5560e-04, -1.8773e-03, -3.4801e-03,  4.2664e-03,
        -3.2798e-03, -3.0134e-03, -5.6218e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4124e-02,  3.4152e+00,  3.2626e-02,  4.1791e-02,  3.3767e-03,
         1.2416e-02, -4.2394e-03,  5.2456e-03,  3.2687e-02,  1.3002e-05,
         1.7920e-02, -5.3350e-02, -5.4574e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3509e-01,  4.5489e+00, -9.6413e-04, -6.4750e-03, -2.2589e-02,
         3.3796e-03, -2.0392e-02,  3.9229e-02, -5.9850e-04, -4.0416e-03,
        -2.8455e-02, -1.5182e-02, -7.3638e-03,  1.5600e-02,  5.5918e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.6064e-01,  6.5499e+01, -6.2045e-01,  1.7359e-01,  4.3259e-01,
         1.7581e-01,  4.3424e-01,  5.5451e-03,  1.1120e-02, -3.0167e-01,
         2.9503e-02,  4.7764e-02,  2.1151e-01, -7.1711e-02,  1.3558e-01,
         9.0619e-02,  1.0497e-01,  1.1804e-01, -1.7895e-01, -7.4930e-04,
         1.6706e-02, -1.0168e-01,  1.1345e-01,  2.0814e-01, -6.5520e-02,
         1.2087e-01, -1.4440e-01,  5.4154e-02, -1.6073e-01,  8.2103e-02,
         1.1602e-01, -6.2614e-02, -3.8689e-02,  1.7625e-01,  3.1334e-01,
         1.0534e-02, -4.9686e-02,  7.5876e-02, -1.0008e-01,  1.1639e-01,
         2.4827e-01,  1.2053e-02, -5.9773e-02,  8.9134e-02, -9.1001e-02,
        -1.3570e-01,  2.7294e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9059e-01,  2.9180e+01,  1.2883e-01,  1.8938e-01,  3.0514e-02,
         4.1302e-02,  1.2248e-01, -7.2742e-02, -7.7617e-02,  1.7294e-02,
        -1.8531e-02,  6.1452e-02, -3.2731e-03, -3.4326e-03,  5.8729e-03,
        -2.6846e-02,  3.8176e-02,  7.2973e-02, -6.0196e-02,  1.9452e-03,
         3.4816e-02,  9.5424e-02,  2.2106e-02, -6.3760e-02, -6.3313e-02,
        -1.0375e-01,  7.4067e-02,  3.0942e-02,  4.5573e-03,  1.4125e-01,
        -7.5430e-02,  7.1845e-03,  5.9403e-03,  1.8309e-02, -1.7110e-02,
        -7.1109e-03, -4.8434e-02, -1.9641e-02, -4.6241e-02, -1.2364e-02,
        -3.2888e-02,  8.0749e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9689e-03,  2.9631e+01,  1.5858e-01, -1.0384e+00,  8.0883e-02,
        -2.9257e-01,  7.1579e-01, -7.5061e-02, -5.8869e-02,  1.7758e-01,
        -4.6052e-02,  6.6008e-02, -6.7971e-02, -8.7334e-02,  1.7330e-01,
         1.6286e-01,  1.9373e-01, -5.8434e-02, -1.3355e-01, -4.5438e-03,
        -1.2045e-01,  4.8765e-02, -1.8927e-01, -9.5745e-02, -1.1444e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4699e-03,  5.5949e-01,  1.6519e-02,  6.9041e-05, -6.5155e-04,
        -3.1627e-03,  5.7174e-03, -8.1329e-04,  2.1715e-03, -5.0677e-04,
         7.6019e-04, -2.2574e-03,  1.7803e-03, -1.0922e-03,  2.1381e-03,
        -9.4021e-04,  1.2484e-03,  7.4924e-03, -1.9050e-03,  2.2052e-03,
         1.5278e-03,  1.4943e-03, -2.9426e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3774e-01,  5.8812e+01,  1.3245e+00, -2.1668e-02, -6.1052e-01,
        -2.6870e-01,  1.3132e-01, -1.4808e-01,  1.6797e-01,  8.7021e-01,
         2.2325e-01, -3.4795e-01,  2.0277e-01, -3.4432e-01,  1.3125e-02,
        -9.3595e-02,  4.6179e-01,  3.8359e-01,  1.1114e-01, -5.5455e-01,
         1.3515e-01, -1.9786e-01,  4.0243e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0130,  0.6614,  0.0122, -0.0084, -0.0057,  0.0066,  0.0045,  0.0110,
        -0.0012,  0.0108,  0.0043,  0.0050, -0.0051,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7989e-01,  5.4431e+00,  1.8930e-03, -1.0966e-02,  8.5611e-03,
        -8.2979e-03,  1.0762e-02,  1.8346e-02,  1.5639e-02,  1.6174e-02,
        -1.4750e-02, -3.0876e-02, -3.4063e-02,  7.4189e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8562e-02,  2.2132e+01,  3.8086e-01,  8.0184e-02,  7.8406e-02,
         3.4794e-02,  9.5074e-03, -5.2534e-03,  5.4385e-02,  2.0650e-01,
         6.1048e-02,  8.6842e-02, -9.4676e-02,  5.1319e-02,  4.8811e-02,
         3.8070e-02, -1.1386e-01,  7.4573e-02, -1.3256e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-9.6106e-02,  1.0707e+01,  9.0747e-02, -7.4324e-02,  6.3061e-04,
        -1.3135e-02,  5.7728e-03, -1.0523e-02, -2.7502e-02,  5.1599e-02,
        -1.7384e-02, -5.3040e-03,  9.4001e-03,  6.5402e-02, -2.7122e-02,
        -1.1892e-02,  3.7176e-02, -2.3875e-02,  2.9702e-02,  6.9423e-02,
        -1.4179e-02,  4.2935e-03, -4.9259e-02,  3.8223e-02, -4.6019e-02,
        -8.5613e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3806e-03,  2.4204e+00, -4.2018e-02, -7.9890e-03, -1.7171e-02,
        -3.5737e-02, -5.3934e-03, -1.3442e-02,  1.1822e-02, -3.1648e-02,
        -6.5355e-04, -8.6837e-03,  6.3016e-03,  1.5821e-02, -4.2270e-03,
         2.7525e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3659e-02,  1.1730e+00,  5.8119e-03,  3.7415e-02,  3.3642e-03,
         2.2538e-02,  8.2528e-04, -2.9975e-03,  7.9945e-03,  3.3709e-03,
        -1.1803e-02,  7.1137e-04,  1.1299e-03,  1.8974e-02,  1.9490e-03,
        -1.0119e-02, -5.9949e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2380e-02,  4.7237e+00, -7.9317e-02, -6.8229e-02, -1.0086e-01,
         3.9155e-02, -8.5197e-05,  5.8985e-02, -3.9989e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2826e-01,  1.1030e+01,  2.2886e-01, -1.6690e-02, -1.3835e-01,
         1.5193e-02,  5.6480e-02, -4.1476e-02,  4.1995e-02, -2.0041e-02,
         1.2220e-01, -7.2555e-02, -4.2260e-02,  9.4564e-03, -1.8912e-04,
         3.7616e-02,  3.7617e-02,  5.5056e-02,  1.6364e-02,  1.4190e-02,
         8.2182e-03,  2.5760e-02, -7.8618e-03, -1.7921e-02, -4.2958e-03,
        -5.5112e-02,  9.3556e-02,  2.8916e-03, -5.9143e-03, -2.5894e-02,
         4.3770e-02,  1.5224e-02, -6.4414e-04,  2.4150e-03, -2.6860e-02,
        -3.2880e-02, -2.8737e-02, -3.5253e-02, -6.1938e-02, -2.6061e-03,
        -5.5965e-02, -4.6093e-02, -1.4496e-02, -3.5828e-02,  2.8084e-02,
         3.6275e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2654e-03,  1.3965e+00, -7.4288e-04, -2.1761e-03,  3.1578e-04,
         3.9008e-03, -4.1975e-03, -5.4005e-05,  8.2662e-04,  4.6874e-03,
         8.9039e-04,  1.8100e-03,  1.1115e-03,  5.1356e-04, -1.3958e-03,
         9.8219e-04,  1.3763e-03,  2.1955e-03, -2.6010e-03,  2.7288e-03,
         4.4845e-03, -1.7423e-03, -6.9133e-04, -1.3994e-03,  2.4946e-03,
        -3.6538e-03,  2.3234e-03,  1.8020e-04,  4.9434e-03, -1.1341e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1224, 48.5820, -0.5053, -0.5946,  0.1129, -0.1362,  0.0706,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.7529e-01,  1.6438e+01, -2.4088e-02,  1.1722e-01,  5.7672e-03,
         4.7476e-03,  9.1998e-02,  1.3259e-01,  1.3940e-01, -5.5249e-03,
        -3.4685e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2053e-02,  4.4391e+00,  4.1799e-02,  6.4395e-02,  9.1922e-03,
        -2.4594e-02,  2.2062e-03, -2.1456e-02,  5.7954e-02,  1.8429e-02,
         3.3584e-03,  3.3073e-02, -1.0163e-01, -1.2561e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7493e-03,  8.0580e-01, -6.0232e-03,  3.1112e-03,  1.8260e-03,
         3.1632e-06,  5.8152e-03, -5.9734e-04, -1.1084e-04,  1.7304e-03,
        -1.6491e-03,  8.4611e-04, -4.4877e-03, -1.0632e-02,  3.0562e-04,
         3.1070e-03,  1.9018e-03,  4.6666e-04,  8.8640e-04, -5.3491e-05,
         4.9371e-03,  2.2238e-03, -5.7531e-03, -1.8448e-03, -2.1355e-03,
        -1.3472e-03,  3.2630e-03,  6.5596e-04, -1.5170e-03, -3.1077e-04,
         2.5305e-03,  1.9534e-03, -1.0593e-03,  1.1061e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4985e-03,  3.3094e-01,  9.1512e-05,  3.9145e-03, -5.7470e-04,
        -1.6587e-06,  1.4715e-03, -3.1467e-03,  4.2159e-04,  4.7139e-04,
        -8.4609e-04, -4.1099e-04,  8.7066e-04,  1.0359e-03,  7.5293e-03,
         1.8779e-03, -6.1550e-03,  4.1345e-04,  8.9364e-04,  7.5433e-05,
         1.1723e-04,  1.5829e-03,  9.6271e-04, -1.0813e-03,  6.5471e-05,
         7.2541e-04,  7.6046e-04, -1.3747e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0488e-02,  7.5099e-01,  1.4355e-02,  1.4153e-03, -3.5587e-03,
         1.7659e-03, -3.3141e-03,  1.7403e-03, -2.9654e-03, -6.4941e-04,
        -1.4770e-03, -2.0847e-03, -2.8064e-03,  4.3213e-03,  1.4002e-03,
        -3.2523e-03,  5.5072e-03, -1.1958e-02, -1.0076e-03,  2.0516e-03,
         1.9112e-03,  2.4773e-03,  1.7415e-03,  2.7522e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 9.7026e-03,  2.0587e+00,  1.4934e-02,  1.6051e-03,  7.4726e-03,
         6.7353e-06,  2.3363e-02,  1.5154e-03,  2.4775e-03,  1.8972e-04,
        -3.2697e-04,  1.2736e-03,  2.7325e-03,  1.0666e-03,  2.2192e-03,
         3.6623e-03, -1.0092e-03,  2.4629e-03,  4.7676e-03,  3.0477e-03,
         7.6265e-03,  1.1454e-03, -9.1067e-04,  2.8416e-03,  1.1315e-03,
         1.6965e-02,  1.7088e-03, -5.2146e-03,  7.4507e-03, -1.5643e-02,
        -3.4960e-04,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0914,  4.1321, -0.0068, -0.0789, -0.0228, -0.0501, -0.0253,  0.0106,
         0.0354,  0.0199, -0.0446, -0.0849,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1074e-02,  1.0794e+00, -3.7853e-03, -2.3319e-03,  5.7253e-03,
        -1.0579e-02, -5.7379e-03, -4.5935e-04, -8.8049e-04,  5.0243e-03,
         1.1138e-02, -1.8485e-02, -7.3220e-03, -5.9387e-03,  5.8102e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2993e-01, -1.3161e+02, -2.3535e+00,  4.9415e-01, -1.3853e-01,
         1.5659e-01, -6.3212e-01,  2.4971e-01, -1.5193e-01,  8.9078e-02,
         6.4435e-02, -2.3323e-01, -6.0607e-01,  1.8981e-01,  4.4036e-02,
         1.0794e-01, -7.2803e-02,  3.3431e-01, -4.2361e-01,  2.8265e-01,
         2.0906e-02, -1.3059e-01,  6.9997e-01,  6.1403e-02, -8.2138e-02,
        -4.7189e-02, -2.2630e-02,  6.3316e-02,  4.0028e-01, -1.2004e-01,
        -1.4423e-01,  3.8885e-02,  1.8977e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1008e-03,  6.7820e-01, -1.1905e-02, -2.9888e-03,  4.0388e-03,
        -1.7489e-03, -3.7078e-03, -5.3096e-04, -1.6922e-03, -9.1851e-04,
        -5.8659e-03, -2.6335e-03, -4.2846e-03,  1.1293e-02,  7.0871e-04,
         5.7603e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7683e-01,  2.8029e+01,  1.1390e-01, -9.2559e-02, -1.6072e-02,
        -4.5211e-01,  1.3783e-01,  2.3416e-01, -1.8321e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7500e-03,  9.0159e-02,  2.4289e-03, -1.2431e-03, -7.1553e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4244e-02,  3.0292e+00,  4.4955e-02,  2.3930e-02, -7.6167e-04,
        -2.1469e-02, -4.9031e-04, -1.4930e-03,  5.4710e-03, -7.1842e-03,
         1.4292e-02, -1.0888e-02, -1.2580e-02, -2.9172e-03, -1.6998e-03,
        -1.8563e-02, -7.8953e-03,  1.1673e-03, -1.2059e-03,  5.3311e-03,
        -6.9838e-03,  2.6222e-03, -1.4777e-03,  1.1975e-02,  1.2279e-03,
        -1.2178e-02, -4.0667e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.1062e-01,  1.5377e+01,  3.8351e-01, -9.0026e-03, -1.7873e-03,
         8.9219e-02,  1.2373e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3657e-02,  1.2872e+01, -4.9996e-01,  3.4352e-02,  1.6425e-02,
         2.5639e-02, -5.6615e-02,  2.3592e-03, -1.1227e-02, -1.0764e-01,
         2.5563e-02, -1.6738e-01, -1.1695e-02, -8.6842e-03, -5.5777e-02,
        -2.7047e-02,  1.0121e-02, -7.8094e-02, -8.3559e-03,  1.1046e-02,
        -1.7538e-02, -4.8393e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8472e-02,  7.9679e+00,  1.9096e-02, -4.5239e-03, -1.1675e-02,
         6.1564e-02, -3.8933e-02, -8.1028e-03, -4.1820e-02, -2.0873e-02,
        -1.0382e-01,  4.8291e-02, -3.6823e-02,  5.9301e-02,  4.5842e-03,
        -1.6090e-02,  2.2965e-03,  1.4510e-02,  4.1383e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5436e-01,  3.0906e+01,  1.3552e-01,  1.9394e-01,  5.2993e-02,
         1.2839e-01, -2.8606e-02, -8.3123e-02,  1.0322e-01,  6.5922e-02,
        -7.1208e-02, -6.0987e-02, -1.9221e-04,  1.6102e-02, -1.1496e-01,
        -5.9981e-02, -2.4605e-01, -8.6854e-02, -1.0151e-02,  1.4203e-01,
        -3.8040e-02,  6.6027e-02,  2.8626e-02,  1.9255e-03, -9.6978e-03,
         9.1695e-04, -3.9439e-02, -2.7317e-02,  8.0616e-03,  2.6807e-02,
         2.7182e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-1.2099e+00, -7.5805e+01,  8.7678e-01,  3.0331e-01,  4.9145e-01,
         3.2556e-01,  1.8534e-01,  7.9514e-02, -2.1053e-01, -9.7690e-02,
         2.5557e-02,  9.3438e-02,  2.1543e-01, -7.7623e-01,  5.3530e-03,
        -1.0824e-01, -1.6364e-01,  3.9916e-02,  8.3899e-02,  5.8123e-02,
         8.0937e-02,  1.3920e-01, -6.1447e-02, -3.7862e-02, -1.9585e-01,
         2.8556e-01,  9.5497e-02,  2.7315e-01, -2.0222e-02,  9.4677e-02,
        -3.0504e-01,  4.1310e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3852e-01,  1.3983e+01, -4.0198e-02,  1.1709e-02,  1.8062e-02,
         1.4733e-02,  4.4435e-05,  2.9576e-03,  2.5344e-01, -1.7539e-02,
         6.9426e-04,  4.1106e-02,  1.0657e-03,  1.1946e-02, -4.2531e-04,
         9.5137e-02,  3.1974e-02,  3.1278e-02, -8.1269e-02,  2.3982e-02,
         7.2984e-02,  3.5578e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2267e-02,  1.5738e+01,  5.9089e-02,  1.0531e-02,  5.8327e-02,
         5.0348e-02, -3.9789e-02, -1.3101e-02,  8.9435e-02,  8.5013e-03,
         2.2217e-02,  2.2423e-04, -3.4170e-02, -9.9841e-03, -9.8992e-02,
         1.1185e-01, -4.1234e-02,  2.0048e-01, -2.3066e-02,  3.1484e-02,
        -6.0018e-02, -5.7246e-02,  4.5571e-03,  1.3070e-01, -9.7438e-03,
         3.5873e-03, -7.1831e-03, -9.0030e-02,  5.8753e-02,  5.7152e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9002e-02,  2.3202e+00, -2.6838e-02, -1.5215e-03, -4.0232e-03,
         2.8227e-03,  1.7920e-03,  3.1065e-03, -7.8950e-04, -5.5689e-03,
        -1.0825e-02,  3.0164e-03, -6.7742e-03, -3.6471e-03, -1.0704e-02,
        -2.1043e-03, -1.4841e-03, -2.6703e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7767e-01,  1.1630e+01,  1.9771e-01, -1.8813e-02,  1.6583e-02,
        -7.5804e-02, -8.8252e-03,  4.6994e-02, -3.7010e-02,  2.8531e-02,
         2.8828e-03, -1.6578e-02,  7.7828e-03,  1.5646e-02, -1.3818e-02,
         1.0890e-02,  9.4300e-02,  8.2171e-03, -1.5545e-02,  2.7142e-03,
         1.8992e-02, -4.7324e-02,  6.1112e-02, -1.5540e-02,  5.5484e-03,
        -2.2963e-02, -1.4692e-02, -3.5833e-03,  3.1345e-02,  3.8194e-03,
         1.8960e-03,  2.1480e-02, -4.7815e-03, -2.9700e-02, -7.3227e-03,
         3.0747e-03,  9.7546e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7283e-02,  8.7737e-01,  1.2242e-02,  2.0150e-02, -6.4871e-04,
        -1.0945e-03, -5.4595e-04, -5.1954e-03, -1.3454e-03, -1.0956e-03,
         1.4394e-04,  1.3314e-03, -9.4883e-03, -1.6848e-03,  5.0945e-03,
        -1.8717e-03, -9.9208e-03, -1.5023e-03, -4.9540e-04, -3.9536e-03,
        -2.0751e-03, -1.3045e-03,  2.1893e-03,  2.7428e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3253e-03,  1.0075e+01,  6.0251e-03,  2.9277e-03, -2.2298e-02,
         1.4011e-02, -9.7644e-03,  3.9014e-02,  3.5609e-02, -2.9928e-02,
         7.5269e-03, -7.8108e-04,  8.6593e-03,  1.2858e-02, -1.2789e-02,
         8.4226e-03,  6.5764e-03, -1.6733e-02,  6.5850e-03, -2.1899e-02,
         1.4267e-03, -1.5054e-02,  1.2157e-03,  1.2635e-02,  8.6023e-05,
        -1.0585e-02, -1.5786e-02, -1.9587e-02, -2.5658e-03, -1.7987e-02,
        -5.3578e-02, -1.9857e-02,  2.5254e-03,  1.4275e-02,  5.0522e-03,
         9.4392e-03,  1.4061e-03, -7.9200e-03,  4.4277e-03, -1.5975e-03,
         3.6944e-03,  1.3080e-02,  2.1912e-03,  2.0856e-03, -1.6910e-03,
         5.4107e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.3222e-03,  1.3240e+00, -3.0689e-02, -5.7432e-04, -1.1456e-02,
         4.2625e-04, -3.2929e-03, -2.3918e-03,  1.7327e-03, -2.2651e-02,
         4.8713e-03, -5.5558e-03, -2.7251e-03, -2.0863e-03,  3.1467e-03,
        -8.5037e-04,  3.4207e-03, -3.2292e-03, -8.5642e-03,  2.0834e-03,
         1.0618e-02, -4.2001e-03,  1.1727e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5304e-02,  5.4896e+00,  7.7706e-02, -2.1584e-02,  3.5558e-02,
         8.4718e-03,  2.9092e-04,  2.1451e-02,  4.1958e-02, -3.0648e-02,
         1.2873e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8048e-01,  6.7071e+00,  6.6778e-02,  4.8773e-02,  5.1009e-02,
        -1.6589e-02,  3.2834e-02,  6.3785e-02,  4.0517e-02,  2.9421e-02,
         3.3931e-03, -2.2068e-03, -5.8546e-02, -5.7323e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2179e-02,  2.2829e+00, -2.0789e-02, -5.0689e-02, -2.7628e-03,
        -1.8898e-03, -2.8171e-02,  9.1878e-04,  2.2182e-03, -4.3359e-02,
        -2.5307e-02,  4.6744e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0373e-02,  2.5272e+00,  5.4859e-04,  1.8427e-02,  8.4566e-03,
         6.1435e-03, -1.0947e-02,  1.5198e-03,  6.8081e-03,  9.6728e-03,
        -2.7497e-03,  5.8791e-03, -1.2464e-03,  6.1458e-03, -2.1990e-03,
        -3.7217e-03, -1.8827e-03,  2.4252e-03,  1.8939e-03, -8.5904e-03,
        -2.4039e-03,  3.0841e-04,  3.6406e-03,  6.8256e-03,  1.7476e-03,
         1.0001e-02,  3.2525e-02, -6.3460e-03, -9.2357e-03,  2.9517e-03,
         3.7662e-04,  1.4538e-02, -5.7030e-04, -6.8592e-04,  2.1944e-03,
        -5.8396e-03, -2.3120e-03,  2.0178e-03, -1.3005e-03, -6.3143e-03,
        -3.8033e-03,  1.8501e-03,  4.4173e-03, -3.1453e-03,  4.7990e-03,
        -2.8407e-03,  8.2328e-03, -8.5521e-04,  1.9304e-03, -2.5841e-03,
         2.7709e-03,  1.8572e-03, -2.9034e-03,  5.8723e-03,  9.1077e-03],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 1.3658e-01,  9.5904e+01,  3.0419e-01, -3.8815e-01,  2.9407e-01,
         7.8516e-02,  5.8511e-01,  1.6079e-01,  7.6533e-02, -2.2457e-01,
        -1.3376e-01, -1.7819e-01,  1.1660e-01, -1.1764e-01, -9.5162e-02,
        -7.3822e-02,  2.0268e-03,  6.4955e-01,  9.0774e-02, -1.8158e-01,
         3.6687e-02, -5.5048e-01, -1.8761e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.6020e-04,  1.5992e+00,  4.0183e-03,  4.2847e-03,  3.5592e-03,
        -4.0393e-03, -1.3421e-03,  6.1212e-03, -3.8309e-03, -1.1027e-03,
        -1.2508e-03,  5.6807e-04, -7.1483e-03,  1.1271e-02, -8.4612e-04,
         8.5036e-05, -7.1287e-03,  1.2666e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0556,  7.7949, -0.1392, -0.0319,  0.0692, -0.0700,  0.0411, -0.0108,
         0.0328, -0.0922, -0.0438, -0.0196,  0.0797, -0.0814, -0.0314, -0.0652,
        -0.0276, -0.0505,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1273e-02,  3.5409e+00, -4.0615e-02, -1.3983e-03,  1.3542e-02,
         1.8852e-02, -1.4162e-02, -1.6567e-02, -7.5918e-04, -2.9801e-03,
         9.0774e-03,  3.7177e-02,  2.7269e-02,  3.5130e-02, -1.0972e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1806e-02,  4.2451e+00,  1.4089e-02,  4.1247e-02, -6.6910e-03,
        -1.1441e-02, -1.4869e-02,  1.0049e-02,  2.5128e-02,  2.6170e-03,
        -8.5316e-03, -1.2341e-02,  1.2723e-03, -6.9441e-03, -1.1162e-02,
         3.0944e-02,  4.7323e-03, -9.2291e-03, -1.3646e-02,  2.5995e-02,
         3.3586e-02,  1.0522e-02,  3.1679e-03,  1.9044e-03,  6.1407e-02,
         6.4402e-03,  1.5080e-02, -3.0635e-03,  3.5994e-03,  1.5184e-02,
        -1.9267e-02, -9.1937e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0191e-01,  1.4389e+01,  1.7890e-03, -1.9569e-01, -3.0234e-02,
         4.5571e-03, -2.0620e-02, -5.9719e-02,  1.8993e-02,  6.0335e-02,
         7.4453e-02, -6.4677e-02,  7.2375e-02,  3.5413e-02, -2.2894e-02,
         4.5221e-02,  1.9599e-02,  3.5520e-02, -1.5500e-02,  1.4086e-02,
         5.9802e-03,  1.0443e-01, -5.4031e-02, -5.8857e-02, -1.5879e-02,
         2.0073e-02,  8.0994e-03,  2.1232e-02, -2.0366e-02,  5.8365e-02,
         1.2313e-03,  5.1624e-02,  5.4395e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4826e-02,  7.6385e+00, -1.9482e-02,  1.6015e-02, -9.2743e-03,
        -1.7953e-03,  7.8408e-03, -4.2236e-03,  3.5552e-02,  3.8162e-02,
         4.3491e-03, -3.3728e-02, -4.3681e-02, -3.4272e-02, -6.0104e-03,
        -1.1000e-02,  2.3362e-02, -1.1580e-02, -2.6542e-02, -8.2602e-04,
        -2.4843e-03, -3.8434e-02, -1.3741e-02,  1.4136e-02,  2.6660e-04,
         1.1123e-02,  4.8307e-03, -3.2985e-02, -9.6571e-03, -9.1664e-03,
         1.7155e-03,  8.7502e-03, -1.4948e-03, -6.0947e-03,  1.6697e-03,
        -1.9033e-02, -1.0041e-02, -5.4456e-03,  2.9887e-03,  7.3818e-03,
         5.6620e-03, -2.0680e-02, -6.8120e-03,  6.3437e-03, -6.9936e-03,
        -4.1462e-03, -9.6062e-04,  1.2157e-02,  3.3768e-03,  1.6786e-02,
         6.5570e-03,  1.9866e-04, -1.1870e-03, -1.1397e-04, -8.8478e-03,
         9.4481e-03, -2.5533e-02,  6.7765e-04], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.0016e-03,  3.9256e+00, -5.6024e-02, -3.2618e-03, -8.3322e-03,
        -1.9263e-02,  5.9132e-03,  2.8852e-03,  1.0343e-02, -3.2650e-02,
         1.0396e-02,  1.9529e-02,  4.4066e-03,  2.9311e-02,  3.2035e-02,
        -2.5686e-03,  1.0269e-02,  4.0361e-04, -1.3121e-02, -3.0181e-02,
        -4.8369e-03, -1.7214e-03, -6.7206e-04, -2.6119e-02,  2.9098e-02,
        -5.4931e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6209e-02,  5.8344e-01, -2.0950e-02, -3.0225e-03, -4.0796e-03,
        -2.4031e-03, -2.7861e-04,  5.7996e-03,  3.1497e-03, -3.3620e-03,
         1.9096e-03, -1.5851e-03, -6.9080e-04, -2.5212e-03, -4.7613e-04,
         2.3454e-04, -1.1492e-03,  4.2616e-04,  1.3490e-03, -6.8028e-04,
        -1.2074e-03, -2.4282e-03, -2.3074e-04, -2.7312e-03, -4.2546e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8293e-01,  6.5384e+01,  9.0304e-01,  8.1264e-01,  7.1037e-01,
         3.4604e-02,  2.5589e-01, -8.3624e-04,  1.6417e-01, -1.3285e-01,
         7.2423e-02,  2.6167e-01,  1.1937e-01,  5.2091e-01,  8.7975e-03,
        -3.2761e-02, -9.7320e-02, -5.3889e-03,  1.0047e-01,  2.0373e-02,
         7.5802e-03,  4.1248e-01, -6.9672e-02,  1.5806e-01,  5.7484e-02,
        -3.7442e-02,  2.5576e-01,  7.2841e-02,  1.8093e-01,  2.9682e-01,
         2.6019e-01, -1.1738e-01,  2.5062e-02,  2.0007e-01, -8.5278e-02,
        -6.3911e-01,  1.3629e-01,  8.2686e-03, -1.6352e-01,  1.2520e-01,
         1.4086e-01,  8.8731e-02,  3.6010e-01,  1.5547e-01, -1.0091e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1927e-02,  1.6434e+00,  1.7193e-03, -1.5687e-03,  9.5585e-03,
         7.4183e-04, -2.9905e-03,  1.3664e-03,  1.0925e-03,  1.3982e-03,
        -3.4435e-03,  3.4094e-03,  3.1780e-03, -2.2825e-03,  6.7984e-03,
         5.4132e-03,  8.8678e-03,  5.8091e-03,  1.8291e-03,  4.8413e-03,
         5.1126e-03,  7.2365e-03, -1.7261e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1657,  2.6353, -0.0054,  0.0252, -0.0130,  0.0095,  0.0309,  0.0114,
        -0.0128,  0.0035, -0.0074, -0.0034,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.7333e-02,  1.8579e+00,  1.1054e-02, -1.1908e-02,  8.9297e-03,
        -4.4694e-03, -1.9087e-03, -3.6781e-03,  8.7478e-03, -6.4813e-03,
        -5.4833e-03, -8.3391e-04,  1.8485e-03, -7.4621e-03,  8.7073e-03,
         4.9832e-03,  1.4035e-03,  4.3660e-03, -2.1843e-03, -1.1416e-02,
        -9.0396e-05,  1.2237e-03, -3.7721e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.6931e-02,  5.5004e+00, -3.4759e-02,  1.5584e-02, -1.2024e-02,
        -2.7513e-03, -1.2516e-02,  1.1200e-03, -1.6938e-02,  2.2363e-02,
         1.1787e-02, -1.4529e-02, -1.6649e-02,  8.5664e-03,  4.7147e-03,
         4.0135e-03, -2.7003e-02, -1.5526e-03, -1.3144e-02,  7.6363e-03,
        -1.4538e-03, -7.0806e-03, -3.3143e-02, -2.1343e-03, -1.3605e-02,
        -2.0296e-03, -1.2913e-02,  4.8709e-03, -8.1794e-03,  7.6626e-04,
         1.3696e-03,  5.0949e-02, -1.0341e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1731e-02,  5.3913e+00,  4.9551e-03, -2.8715e-02,  1.2762e-02,
        -1.2005e-02, -1.1337e-01,  6.9386e-03, -1.2405e-03,  2.0700e-02,
         1.4668e-02, -3.5681e-02,  5.5492e-03, -2.1194e-02, -1.6154e-02,
        -4.1229e-02,  1.9398e-05,  2.5484e-02,  2.7750e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0027,  0.8763, -0.0192,  0.0062, -0.0079,  0.0078,  0.0121,  0.0025,
         0.0061, -0.0024, -0.0011, -0.0012,  0.0076, -0.0072,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4340e-01,  3.0614e+01,  7.6496e-02, -1.3741e-01, -7.2800e-02,
         3.0568e-03,  7.0928e-02, -6.5475e-02,  2.6210e-01, -4.4782e-02,
         4.1863e-02, -4.8153e-02,  7.7641e-02, -1.4139e-01, -9.4958e-03,
         1.0940e-02, -1.1554e-02, -2.8981e-02, -3.8084e-02, -1.1931e-01,
        -3.9090e-02, -9.6099e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3876e-02,  1.6571e+01, -1.8097e-01, -4.5910e-02,  6.5000e-03,
         3.0001e-02,  7.5845e-02, -1.0253e-01, -5.6589e-02,  3.2993e-02,
        -1.7629e-02,  3.6867e-02, -7.3849e-02, -1.0062e-02, -1.9784e-02,
         3.8078e-02,  1.4987e-01,  8.6622e-02,  9.7734e-03,  7.5453e-02,
        -4.2329e-02, -6.5044e-03, -1.0611e-02, -2.1914e-02, -9.8393e-02,
        -2.0280e-02,  5.8426e-02, -2.4930e-02,  4.1289e-02,  6.5680e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5678e-02,  6.0924e+00, -5.3212e-02, -7.3068e-02, -1.5297e-02,
         7.4290e-02, -3.3600e-02, -3.3665e-02, -3.0567e-02,  6.0578e-03,
        -7.8741e-02, -1.4106e-02,  7.9953e-03, -4.1420e-02, -2.9575e-02,
        -7.5191e-02, -3.4705e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3986e-01,  2.4480e+00,  1.9630e-03, -3.9816e-03,  7.1248e-03,
         2.2973e-02, -5.1375e-03,  1.7393e-02, -1.9711e-02,  3.0203e-02,
         5.4909e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.6269e-02,  1.5955e+01,  2.3305e-01,  5.0239e-02,  6.9765e-02,
         3.9914e-01,  2.0858e-02, -1.1183e-02,  8.8518e-03,  3.7715e-02,
        -2.4653e-02,  3.4055e-02,  4.2313e-02,  4.4207e-03,  8.8405e-02,
        -2.9074e-02,  3.9370e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3170e-02,  2.1780e+00, -1.7382e-01,  4.3779e-02, -6.7236e-03,
        -1.1757e-02, -2.0077e-02,  2.3796e-03,  3.6298e-03, -1.4201e-02,
        -1.2268e-03, -6.2037e-03, -7.0023e-05,  6.3001e-03,  1.5525e-02,
         1.1020e-03, -4.9381e-04, -5.2308e-03, -5.3755e-03, -2.5813e-03,
         5.5874e-05, -1.1244e-03,  8.3564e-03,  2.7872e-03,  8.8615e-03,
        -9.2436e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2164e-01,  3.3588e+00,  4.0634e-02, -6.1169e-03, -9.5596e-03,
         1.7632e-02,  9.5732e-03, -1.2279e-03, -5.1708e-03,  9.7012e-04,
        -3.1493e-02,  3.0400e-03, -5.3354e-03,  1.9894e-03, -6.2919e-03,
        -1.2844e-02, -3.9975e-03,  5.4642e-03, -3.8206e-03, -1.4888e-02,
        -6.3954e-03, -9.4639e-03, -8.5759e-03,  3.4598e-03,  1.9587e-03,
        -7.4796e-03,  4.3813e-03, -9.6680e-03, -5.0300e-03,  2.2274e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1270e-01,  2.5698e+01,  3.0225e-02, -4.4333e-03,  3.6545e-02,
        -3.8102e-02, -2.9950e-01,  6.8646e-02, -1.2860e-01, -6.4923e-02,
         4.0192e-02, -1.0697e-01,  1.5585e-03, -7.8741e-02,  1.7748e-02,
         1.4631e-02, -6.1660e-02,  1.5712e-01, -1.4596e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-3.3036e-01,  1.8394e+01, -2.9633e-01, -7.6803e-02, -2.0334e-02,
        -2.5237e-02,  3.6600e-01, -6.3375e-02, -1.2229e-02,  2.3149e-02,
         3.3798e-03,  6.7317e-02,  1.7747e-02,  1.4618e-01, -1.8151e-02,
         6.2365e-02,  1.2945e-01,  5.1707e-02, -1.9536e-03,  1.1293e-01,
        -5.2053e-02, -1.6610e-03, -8.4256e-04, -7.3606e-03, -1.5179e-02,
        -3.5508e-02, -1.1117e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.7661e-02,  6.2400e+00,  1.8110e-02, -2.4667e-02,  4.7413e-02,
         3.1229e-02,  1.5267e-02,  6.8940e-02,  4.1486e-02,  1.8368e-02,
        -3.1946e-02,  8.6203e-02,  2.8117e-03, -5.6242e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1087e-02,  2.7936e+00, -9.1931e-03,  4.3529e-03, -6.3594e-04,
        -8.6636e-03,  5.7413e-03,  4.1984e-03,  8.5360e-03, -2.3698e-03,
         6.1175e-03,  6.1347e-03, -2.6014e-02,  6.7156e-03, -1.4000e-02,
        -7.2681e-03,  8.1202e-04, -5.7063e-03, -7.3612e-03, -7.7734e-03,
        -5.9177e-03, -3.6932e-03, -6.8867e-04,  3.2596e-03, -2.4486e-03,
         2.1467e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0587e-02,  1.8572e-01,  3.6963e-04, -1.1918e-03, -9.0571e-04,
        -2.5434e-03, -1.2798e-04,  4.4438e-04,  1.1683e-05,  3.2232e-03,
         6.4330e-04,  1.1974e-03, -2.0319e-04,  3.3474e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.6224e-02,  4.7653e+00,  2.1202e-02, -2.0015e-02, -7.2583e-03,
        -9.1339e-04, -2.2666e-02, -2.0794e-02,  9.2302e-03, -1.4005e-02,
         4.7473e-03, -7.9179e-03,  1.3410e-02,  5.7205e-03,  1.3224e-02,
         8.8031e-03,  1.1945e-02,  7.5515e-03, -4.2241e-03, -1.4821e-02,
        -6.5766e-03, -6.2727e-04,  5.0707e-03,  6.3283e-03, -1.7595e-03,
         6.8941e-03, -2.2640e-02, -1.5486e-02, -3.5029e-03, -7.8220e-03,
         3.0393e-02,  1.0304e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2385e-01,  2.7512e+01,  1.5251e-01,  5.9146e-02, -1.3953e-02,
        -5.0801e-02, -1.2832e-01,  2.0938e-02,  5.6493e-02,  8.2201e-02,
         1.0896e-01,  1.8382e-01,  1.0648e-01, -1.9718e-03,  3.9416e-02,
         6.5341e-02,  1.2487e-01, -1.9497e-02,  7.5384e-02,  1.2295e-01,
         2.3208e-02,  2.8770e-02,  3.6782e-02, -6.8014e-02, -2.1043e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0033,  1.5790,  0.0135, -0.0026,  0.0083, -0.0061,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.5289e-02,  2.6776e+00,  3.4615e-02,  8.1950e-05,  5.3998e-03,
        -5.7529e-03,  1.3488e-04, -8.4585e-03, -1.9585e-03, -3.1508e-03,
        -6.6187e-03,  1.4486e-02, -1.9960e-03,  1.1406e-02, -5.5129e-03,
        -2.2376e-03,  8.4492e-03,  4.2548e-03,  1.3551e-03, -3.4809e-03,
        -5.7740e-03,  6.3874e-03, -6.0028e-03, -6.5276e-04,  2.3674e-03,
         8.7893e-03, -2.8085e-03,  9.4955e-03, -4.1764e-03,  1.0740e-02,
         9.3692e-03, -3.0169e-03,  1.5307e-03, -3.5958e-03,  1.3920e-03,
        -6.0078e-03,  3.4899e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4171e-01,  1.0104e+02, -7.7183e-01, -4.9582e-01,  1.5366e-01,
        -4.7125e-01,  1.8275e-01,  1.2394e-01, -9.9524e-02, -7.2889e-02,
        -2.2470e-01,  1.0627e-01,  2.2833e-01,  2.6024e-01,  1.4688e-02,
        -3.2016e-01, -3.7351e-01,  9.2428e-01, -4.1429e-02,  4.9981e-01,
        -8.3927e-01, -9.7713e-02, -5.8257e-01,  1.0255e-01, -8.3912e-02,
         6.2115e-03,  2.4377e-02, -7.6445e-01, -1.3647e-01,  4.0496e-01,
        -2.1018e-01,  6.7488e-02, -7.6550e-02, -2.9472e-01, -1.8152e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3905e-01,  2.2059e+01, -1.9562e+00, -1.2513e-01, -1.3545e-01,
         1.7213e-02, -1.7009e-01, -1.1719e-01, -8.9270e-02,  4.7738e-01,
        -2.2467e-01, -6.5903e-02,  2.4618e-03,  6.8918e-03,  1.5419e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1053e-01,  8.8044e+00, -1.3603e-01,  3.7571e-02,  1.9492e-03,
         2.5311e-02,  1.5026e-02, -6.4166e-03, -6.0062e-02, -2.0622e-02,
        -3.9587e-02,  1.6194e-02, -1.4005e-02, -2.2071e-02,  9.5352e-03,
        -2.7329e-03, -1.7185e-02, -1.2723e-02, -1.0363e-02,  5.5499e-03,
        -2.7621e-04,  4.4158e-03, -3.4527e-02, -3.7958e-03,  5.2439e-03,
        -1.4485e-03,  2.6468e-03, -1.1538e-03, -2.3548e-03, -8.9218e-03,
        -1.6768e-03, -1.5716e-02,  1.7433e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0929e+00, -9.0652e+01,  3.5569e-01,  3.4576e-01,  5.0029e-01,
         1.9806e+00, -1.3430e-01,  1.3048e-01,  5.3382e-01, -6.7912e-02,
        -3.9661e-02,  2.6619e-01,  8.1934e-02, -3.8860e-01, -2.6220e-01,
        -1.0622e-01, -9.6816e-03,  2.9862e-01, -3.9308e-02,  9.0282e-02,
         2.0852e-01,  6.4593e-02, -3.0581e-02,  9.1279e-02, -1.3870e-01,
         3.5314e-01, -1.9070e-01, -4.3614e-01,  4.3041e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 4.9326e-01,  2.4007e+01, -3.4959e-01, -1.1758e-01, -2.5471e-01,
        -2.3239e-01, -2.0146e-02, -4.1491e-02, -1.2139e-02, -9.2673e-02,
        -1.1547e-01, -1.2987e-01,  9.1730e-02,  1.4596e-02, -1.1318e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.7991e-02,  4.6325e+01,  2.1309e-01,  3.1895e-03, -8.1906e-02,
        -1.5223e-01,  9.4136e-02,  1.4260e-01,  3.7567e-02,  1.2571e-01,
        -4.2339e-01, -6.7303e-02, -1.1669e-02,  1.7562e-01,  2.2813e-03,
        -9.5779e-02, -4.5087e-02, -5.5218e-02, -7.4242e-02,  1.1335e-01,
        -9.5257e-03, -1.0089e-01, -7.9284e-02, -6.4949e-02,  1.9935e-02,
        -5.0516e-03, -1.6799e-01,  6.2911e-02, -1.7498e-01, -7.3886e-02,
         3.4457e-02, -1.6213e-02,  2.4491e-02,  4.1183e-02, -5.3811e-02,
        -7.2293e-02,  3.3602e-02, -7.1892e-03,  8.2025e-02, -7.3567e-02,
        -7.5817e-02,  1.1085e-01, -4.8945e-02, -3.1651e-02, -1.0921e-02,
        -1.7144e-01,  4.4875e-02, -2.7739e-01,  1.1984e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.2930e-01,  9.7826e+01,  1.3275e+00,  3.8609e-01,  5.3777e-01,
         1.9102e-01,  3.6251e-01, -3.9165e-02,  8.7337e-01,  5.6489e-01,
         2.0100e-01,  1.2521e-01, -8.1302e-02,  2.7353e-02, -3.5934e-01,
        -3.5624e-02,  6.7470e-02,  1.6737e-01, -6.0651e-03, -6.1415e-02,
        -4.0577e-02,  2.2708e-01, -1.6734e-01, -1.2859e-03, -5.8156e-02,
         3.7960e-02, -4.4955e-01,  1.0905e+00,  8.2476e-02, -9.4220e-02,
         1.5204e-01,  1.5258e+00,  3.3093e-02, -1.2006e-01,  5.4313e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7512e-02,  1.1329e+01, -4.2284e-02,  5.5464e-02,  6.5746e-04,
         6.8281e-02, -9.2521e-02,  2.7072e-02,  9.6226e-03, -4.2416e-02,
        -2.0542e-02, -3.2576e-02, -2.8271e-02, -1.0760e-02,  1.1581e-01,
         1.1553e-02, -1.3705e-02, -1.4592e-02, -7.8408e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2182e-02,  9.4209e+00, -1.3342e-01, -2.7904e-02,  4.5519e-02,
         2.6263e-02,  6.6754e-03, -5.2721e-02,  3.2637e-04, -2.2270e-02,
        -3.9563e-02, -2.8285e-02,  1.3111e-02, -2.0092e-01, -3.1904e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2310e-01,  7.4468e+00,  2.4856e-03, -8.4525e-03, -1.8105e-02,
         3.2534e-02, -2.7402e-02,  4.7192e-03, -1.1033e-02, -9.3245e-03,
        -1.6851e-02,  1.5016e-02, -2.9623e-02, -1.1751e-02, -3.3441e-03,
        -3.5557e-02, -1.0113e-02, -5.2780e-03, -4.0918e-02, -7.4127e-03,
        -8.8433e-03,  2.4940e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1764e-02,  2.4239e+01, -4.1270e-01, -4.7049e-02, -1.5994e-02,
        -1.0705e-01,  2.0087e-02, -2.3660e-02, -1.8988e-01, -1.8807e-01,
        -1.4492e-02,  1.4385e-01, -1.2265e-01, -3.3078e-02,  5.1846e-02,
         1.1812e-02, -3.2284e-02, -1.1854e-01,  1.3809e-02,  2.5498e-02,
         1.7273e-02,  1.2410e-02, -2.2435e-02, -9.6445e-02,  3.2430e-02,
         9.2382e-03,  3.3296e-02, -6.3800e-02, -1.3871e-01,  2.1181e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0654,  1.8452, -0.0283, -0.0150, -0.0204, -0.0076, -0.0040, -0.0141,
        -0.0107,  0.0089, -0.0471,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6646e-02,  1.9461e+00,  1.2968e-02, -1.2074e-03,  9.9645e-03,
         2.4295e-03,  1.1960e-02, -8.0863e-03,  2.2493e-03, -2.3104e-03,
         1.5074e-02,  1.1830e-02, -1.8667e-03,  8.5075e-03,  4.9641e-03,
        -4.1297e-03, -5.2568e-04,  1.1105e-03, -2.6435e-03, -2.1696e-03,
         2.2649e-03, -1.7445e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2693e-01,  2.1753e+01,  1.8777e-01, -8.7592e-02, -9.1374e-02,
        -5.3259e-01, -1.9021e-02,  7.5419e-02, -6.8187e-02, -4.1586e-02,
        -1.6800e-01,  6.3564e-02,  5.4352e-02, -1.2055e-01, -7.4428e-02,
        -6.7586e-02, -5.0465e-03, -1.0679e-01,  1.0654e-02, -8.2496e-02,
        -2.5906e-02, -7.9671e-02, -1.2380e-01, -8.6250e-03,  2.2220e-02,
        -4.3054e-02, -5.8944e-03, -1.0411e-03, -1.5797e-01,  5.6665e-02,
         6.1419e-02,  3.4137e-02,  2.5790e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6445e+00,  1.0828e+02,  2.3351e+00, -1.0389e-01,  5.1468e-02,
        -8.8643e-02,  1.0478e-01, -9.2649e-01,  3.1864e-01, -5.6590e-02,
        -7.3084e-01,  5.3366e-02,  1.6167e-01,  1.0794e-01, -2.2904e-02,
         1.3506e+00,  4.2616e-01, -2.5126e-01, -3.0674e-02, -2.5511e-01,
         1.9456e-01, -4.2981e-01,  4.1591e-01, -1.9855e-02, -2.9248e-01,
         2.2924e-02,  1.9931e-01,  9.4842e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.0911e-03,  1.3185e+01, -7.2975e-02, -1.4859e-02,  3.7547e-02,
         4.0960e-02,  2.8425e-02,  8.7918e-02, -1.5963e-01,  3.4057e-02,
        -1.5648e-01, -7.7114e-03, -1.5854e-02,  2.0654e-02, -1.6696e-02,
        -8.9488e-03,  1.9787e-02, -3.8272e-02,  1.1089e-02, -1.9902e-02,
        -1.6158e-02, -1.1412e-02,  1.6269e-02,  1.6502e-01,  2.2547e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 5.0012e-02,  8.8063e+00,  2.3161e-01,  2.3796e-02, -9.0653e-03,
        -2.3363e-03, -2.9730e-02,  1.6971e-02,  6.9009e-03, -1.2170e-02,
         1.3199e-02,  2.9118e-02, -1.7762e-02,  8.7684e-02, -6.8182e-03,
        -5.4641e-02, -5.7019e-03,  1.8573e-02, -1.2540e-02, -1.2951e-02,
         5.8486e-02,  1.0658e-02,  3.2090e-03,  3.8830e-03,  4.0458e-02,
        -5.7747e-03, -6.3265e-03, -2.8780e-02,  1.1137e-03,  6.8138e-03,
        -2.2902e-02,  1.1241e-02,  9.0953e-03,  1.3114e-02, -1.6509e-02,
         2.1009e-02, -3.4039e-03,  3.1266e-03, -1.5158e-02,  7.9821e-04,
         9.0988e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1110e-01,  1.5557e+01, -7.4433e-02, -1.4724e-01,  1.2906e-01,
        -1.2579e-01,  2.2181e-02,  7.8191e-02, -2.9979e-02, -1.4860e-02,
        -4.2500e-02, -6.6954e-02,  8.1567e-03, -3.0796e-02,  9.8275e-02,
        -6.0156e-02, -6.8006e-03,  3.6048e-02, -2.1509e-02, -1.5536e-02,
        -2.3710e-02, -1.1322e-01, -1.7597e-02, -2.4447e-02,  1.2010e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.1352e-02,  9.7852e+00,  1.1606e-01, -6.3103e-02, -1.7484e-02,
        -3.4494e-03,  1.2751e-01, -4.1498e-02, -4.0958e-02, -2.4389e-03,
        -2.8595e-02,  3.6645e-02, -1.9847e-03, -4.3418e-03, -2.7563e-02,
        -1.0804e-02,  1.0073e-02,  1.0298e-02, -7.1496e-02,  4.3102e-02,
         4.0540e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1528e-02,  2.1396e+00, -8.2575e-04, -3.1175e-02, -2.1598e-02,
         5.7947e-03, -3.8788e-03,  1.5469e-03, -1.4425e-03, -1.1979e-02,
        -1.1640e-03, -2.2317e-02,  2.7728e-03, -1.9970e-02, -1.5100e-02,
         8.6621e-03,  4.6851e-03, -1.5454e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5353e-02,  4.0002e+00,  3.7781e-02, -2.5132e-02, -1.9140e-02,
         6.6914e-03, -3.5683e-03, -1.6212e-02, -9.8923e-03, -3.4199e-03,
         1.0809e-03, -9.4255e-03, -4.1309e-03, -1.0202e-02,  7.9635e-04,
         2.6945e-02, -1.7210e-02, -1.2913e-02, -6.9300e-03, -2.6225e-02,
        -7.1405e-03, -1.9032e-03,  9.7295e-04, -3.1903e-03,  8.8398e-03,
        -4.5036e-03, -5.5012e-03,  2.6294e-03, -6.3674e-03, -6.1653e-03,
         5.2481e-04,  2.0735e-03,  3.5255e-04, -4.1174e-04,  2.0225e-02,
         9.2638e-03, -8.7123e-03, -1.2338e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.6576e-03,  6.1882e+00, -6.8753e-02, -1.5762e-02, -4.5574e-03,
         2.7699e-02, -1.4836e-02, -2.7129e-03, -1.7190e-02, -8.0547e-03,
         4.8132e-03, -1.8502e-02, -1.3356e-02, -5.2132e-02, -3.4650e-02,
        -2.0921e-02,  6.9259e-03, -4.0545e-02,  2.1124e-03, -2.9432e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2660, 22.1393, -0.1400, -0.0438,  0.2142, -0.2265,  0.0798,  0.2480,
         0.0425, -0.0505,  0.0860, -0.0252, -0.0459,  0.2324,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2432e-01,  3.2403e+01,  4.0948e-02,  5.9600e-02,  9.0907e-02,
        -7.1812e-02, -6.4254e-02,  9.2567e-02,  1.0638e-01,  4.6724e-02,
        -1.1183e-01,  1.6560e-01,  5.4913e-02,  2.3327e-01,  1.2475e-01,
         1.1175e-01,  1.0576e-01,  1.2184e-03, -3.1627e-02,  1.2146e-01,
        -1.1473e-01,  4.3512e-01,  5.9287e-02,  1.2118e-01, -7.1717e-03,
         1.5536e-02,  4.4167e-02, -6.3455e-02, -3.2474e-02, -1.0268e-01,
        -1.1250e-01,  2.8189e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7646e-01,  8.7338e+01, -3.9535e-01, -4.8079e-02, -8.3608e-01,
        -6.0592e-01,  1.7194e-01, -1.3291e+00, -3.3236e-01, -2.4104e-01,
         7.0258e-02,  7.6700e-02,  1.8195e-01,  4.1475e-02, -1.0542e-01,
        -1.6253e-01, -1.6792e-01, -1.6361e-01, -3.7276e-01, -4.8734e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7207e-02,  9.3801e-01,  5.0639e-03, -3.5568e-03,  4.4508e-03,
        -1.6649e-04,  2.2421e-03, -1.6165e-03,  7.5802e-04, -1.9269e-03,
        -1.0233e-04, -1.7332e-03,  1.2763e-04, -1.5404e-04,  8.0938e-04,
        -2.3323e-03,  1.1260e-03, -1.3104e-03, -2.4614e-03, -5.3001e-04,
         9.9760e-04, -1.6866e-03, -5.2490e-03, -1.4011e-03, -2.5278e-03,
         1.1859e-03, -4.1125e-03,  8.9191e-04, -1.5229e-03, -7.2254e-04,
         1.5134e-03,  1.0565e-03,  2.9204e-04,  1.1428e-03,  4.2141e-03,
         4.2688e-03, -2.3674e-03, -1.2310e-03, -5.2559e-04, -1.7322e-03,
        -3.9608e-03, -2.3859e-04, -1.8596e-04,  4.3154e-04, -8.3965e-04,
         5.4279e-03,  1.4916e-04,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.1901e-02,  2.7069e+00, -2.1355e-02, -2.7878e-02,  3.5269e-02,
         1.0990e-03,  1.6802e-03, -5.7595e-03, -1.5784e-02, -1.6167e-02,
        -1.2300e-02, -1.0961e-02,  2.5338e-03,  2.5342e-02,  2.0031e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3095e-02,  2.6248e+00, -4.0597e-02, -5.1854e-03, -1.2541e-02,
        -1.6791e-02, -2.6724e-03, -1.7364e-02,  2.5245e-03, -8.6682e-03,
         5.5874e-04,  2.8655e-03, -9.7099e-04,  1.0416e-02, -5.6287e-03,
        -1.5459e-02,  1.4267e-03,  5.4968e-03,  2.2732e-03, -1.0241e-04,
         1.6441e-03, -1.0092e-02, -6.3447e-03,  4.3146e-03,  1.1602e-03,
        -5.7364e-03, -9.1135e-04,  1.9798e-02,  3.8443e-03,  1.3115e-03,
         5.8811e-03, -5.2849e-03, -1.0629e-03,  7.1536e-04,  3.5898e-03,
         3.3757e-03,  1.4459e-03, -2.4768e-03, -1.0364e-02,  1.4822e-03,
        -1.1483e-03, -2.9879e-03, -3.3271e-03, -1.3117e-03,  1.3860e-03,
        -5.0706e-03, -4.5621e-03,  1.6634e-02], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-1.2122e-02,  4.8272e-01,  5.7619e-04, -7.5021e-04,  3.1850e-03,
        -1.3969e-03,  8.1780e-04, -9.5077e-04, -1.5239e-03, -6.2066e-03,
        -4.3213e-04,  7.4889e-04, -4.2279e-04,  3.5180e-04, -4.7810e-04,
        -7.6044e-04, -4.7557e-03, -1.8258e-03,  1.5515e-04,  5.6967e-04,
         2.1749e-04, -6.8773e-05, -1.5448e-03,  5.4445e-04, -5.9758e-05,
        -3.3704e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9681e-01,  1.8418e+01, -4.6751e-02, -5.0476e-02,  2.1838e-02,
        -4.2962e-02,  4.9304e-02,  1.2347e-01,  5.5403e-02, -3.4763e-02,
         1.5583e-01,  1.2950e-02, -9.2378e-03,  4.5502e-03,  1.9938e-02,
        -7.7436e-03,  1.1143e-01, -3.8432e-02, -3.4070e-02, -2.9510e-02,
        -1.2337e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.5684e-03,  5.2429e-01,  7.0730e-03, -9.7136e-03, -2.7230e-03,
        -2.9496e-03,  7.0532e-04, -2.3410e-03,  4.0846e-04, -3.6279e-04,
        -2.3637e-04,  5.4688e-04, -1.1944e-03, -2.1905e-03,  1.6300e-03,
        -1.2974e-03,  3.4661e-03, -3.1718e-03,  1.0829e-03,  7.1665e-04,
        -1.1207e-03,  4.1938e-04, -7.5452e-04,  3.0312e-03,  3.1094e-03,
         6.1119e-05, -9.7881e-04,  4.2591e-06,  7.1584e-04,  4.6943e-04,
        -7.5234e-04, -8.4830e-04,  2.6016e-03, -5.1523e-04, -2.1693e-03,
         1.6954e-03,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.1550e-03,  4.5627e-01, -4.9357e-03,  6.9966e-04, -1.1404e-03,
         3.0172e-03, -2.4606e-03,  2.0524e-04, -1.3669e-03, -1.0137e-03,
         1.4819e-03, -1.8904e-04, -1.0204e-04, -6.7784e-04, -1.0805e-03,
         4.1858e-03,  2.0575e-03, -2.3980e-05,  1.2327e-03, -1.4594e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6586e-02,  3.1396e+00, -3.6064e-02,  1.2995e-02,  6.2630e-03,
         1.9241e-02, -4.2731e-03, -3.6515e-03, -3.3208e-03, -1.8949e-03,
         1.2323e-03, -6.7911e-03,  1.3991e-04,  1.0146e-02,  1.9985e-03,
         4.8855e-03, -3.9766e-03, -7.0482e-03, -1.7617e-02,  5.4479e-03,
         2.7914e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2414e-02,  9.5985e+00, -3.2400e-02, -4.3390e-02, -5.7799e-02,
         3.1077e-02, -9.9659e-03, -2.1688e-02,  5.8691e-02, -7.9472e-03,
        -3.7156e-02, -1.3959e-02, -1.2850e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6473e-02,  4.1286e+00, -2.0244e-02, -1.6892e-02, -1.3129e-02,
        -6.4498e-03,  6.6264e-04, -6.8465e-03, -1.2851e-02, -3.5132e-03,
        -2.2817e-02, -1.3879e-02, -2.1093e-02, -1.1476e-02, -3.5296e-04,
        -6.8068e-04, -4.1191e-04,  4.1168e-03, -1.5851e-02, -1.0826e-02,
         1.4782e-03,  1.4038e-02,  1.8905e-03, -3.5365e-03, -5.7643e-03,
        -2.2766e-04,  6.1734e-03, -1.9787e-02,  3.1849e-03,  4.2598e-03,
         5.9136e-03,  4.8335e-03,  1.2705e-02, -3.4392e-03, -4.5010e-04,
         1.3256e-02, -1.9716e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1431e-02,  4.7080e+00,  1.2354e-01, -2.3300e-02,  9.1797e-03,
        -1.4650e-04, -1.9831e-02,  1.8391e-02,  7.0206e-03, -1.2103e-03,
        -3.4288e-03,  1.2382e-03, -4.2642e-04,  1.4672e-02,  2.8418e-02,
        -1.6513e-02,  1.5560e-02, -2.0941e-03,  1.4823e-02,  8.9801e-03,
         9.9298e-03, -3.2663e-03,  7.8604e-03,  1.7394e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3121e-01,  1.5013e+01,  3.0572e-01,  8.5997e-02,  8.3460e-02,
         1.8015e-01,  7.1236e-03,  4.0046e-02,  1.3064e-01, -4.1889e-02,
        -3.7806e-02,  4.1779e-02,  7.3770e-02,  4.0418e-02,  1.2280e-01,
        -3.2311e-02,  4.4995e-02,  7.1696e-02,  3.5808e-03, -5.1475e-03,
         1.4120e-02, -9.6181e-02,  4.6960e-02, -3.0572e-02,  2.7213e-02,
        -1.7204e-02, -8.7633e-03, -3.1966e-02, -2.1367e-02,  1.7850e-02,
        -9.1055e-02, -7.9029e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3069e-01,  2.1798e+01,  3.6824e-02, -1.8638e-01,  1.1115e-02,
        -1.3159e-01,  6.9539e-02,  3.3378e-02,  1.8438e-01, -5.9898e-03,
         4.2238e-02, -2.8308e-03, -1.3518e-02,  2.8428e-02,  1.2264e-03,
         1.3113e-02,  5.8549e-02,  7.1028e-04, -2.9572e-03,  7.8204e-02,
         2.1753e-03, -8.8813e-02, -3.3015e-02,  8.1013e-03,  1.5714e-02,
         6.6657e-02,  1.3883e-02, -2.8813e-03,  2.8603e-02,  2.4609e-02,
         5.0134e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3059e-02,  3.3607e+01,  2.1346e-01,  5.1130e-02,  1.8596e-01,
         2.0047e-01,  1.8091e-01, -9.8446e-02,  3.1495e-01,  2.6027e-02,
         1.6937e-01,  9.8792e-02,  8.1963e-03, -6.8748e-02,  1.6285e-01,
        -2.4532e-02,  1.0328e-01,  8.9523e-02,  1.4333e-01, -1.0808e-02,
        -1.6059e-01, -3.5726e-01,  1.4882e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1915e-01,  2.6744e+01, -3.9816e-02, -1.3828e-01, -1.1207e-01,
        -1.3887e-02, -6.6718e-02, -9.4250e-02, -3.8667e-02, -1.0814e-01,
        -2.4407e-02, -1.3851e-01, -5.4006e-02, -1.2073e-01,  2.5818e-02,
         7.3647e-02, -6.2967e-02, -2.1808e-02, -7.3601e-03,  8.6803e-02,
        -1.2360e-01, -3.7318e-02,  2.3858e-02,  2.2411e-02, -7.6538e-03,
         5.1462e-02,  3.8387e-01,  2.2640e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-4.2465e-02,  8.9012e+00,  2.2783e-03,  1.5148e-02, -2.5573e-02,
        -6.2541e-03,  9.5390e-03, -1.0261e-01, -1.5821e-02,  1.6921e-04,
         1.6278e-02, -1.8159e-02,  5.8885e-03,  5.9818e-03,  9.4818e-03,
         2.0284e-02,  4.3535e-03, -1.2147e-03, -2.3565e-02, -2.8644e-02,
         3.3140e-03, -6.0312e-03, -1.2667e-02,  5.2032e-03,  3.6566e-04,
        -1.2230e-02,  2.1134e-02,  9.3405e-02, -1.5974e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3635e-01,  7.7395e+01,  1.0052e+00, -2.2271e-02,  1.1508e+00,
         1.4675e-01,  8.6325e-02, -2.5953e-01,  1.2669e-01, -2.2417e-01,
         3.3456e-02,  1.2531e-02, -1.3006e-01,  2.5441e-01,  5.0709e-01,
        -7.2871e-02,  3.3074e-01,  2.0802e-02, -1.5180e-01,  1.7781e-02,
         3.9101e-02, -3.3579e-01, -2.9149e-01, -5.6267e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3574e-01,  1.6664e+01, -2.6434e-01, -6.2125e-03, -4.5757e-02,
         3.8743e-02, -3.7550e-02,  1.8139e-02, -1.3782e-01,  2.4443e-02,
        -2.7976e-02, -5.1658e-02,  1.4833e-01, -2.7559e-02,  1.4225e-02,
         5.1937e-02, -7.1046e-03,  2.2207e-02,  6.2949e-03,  4.2939e-02,
        -3.6415e-02, -3.0249e-03,  4.0877e-03,  4.7940e-03,  1.5033e-02,
        -4.2378e-02, -9.4120e-02,  2.8760e-02, -2.0063e-02,  1.0338e-03,
        -1.2304e-02,  1.1769e-02, -4.6302e-03, -5.1387e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.3603e-01,  8.2959e+01, -7.9597e-01, -1.6836e+00,  5.4884e-02,
         1.1443e-01, -8.0335e-02,  2.7590e-01, -8.8855e-03, -2.1326e-01,
        -4.7349e-01, -3.2628e-01, -4.8189e-02, -4.4298e-02, -4.6550e-02,
        -2.4410e-01, -3.8015e-01,  2.0709e-01, -2.3441e-01,  2.7063e-01,
         8.3939e-01,  1.1939e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7802e-01,  2.2151e+01, -1.6661e-01,  4.3820e-02,  2.5716e-03,
        -5.2666e-02,  1.6320e-02, -1.1668e-01,  4.0006e-02,  6.1902e-02,
         2.5001e-01, -3.1140e-02, -3.3437e-02,  9.1412e-03,  1.6176e-02,
        -1.0543e-01,  4.2324e-02,  1.9055e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3991e-01,  1.9050e+01,  5.8381e-02, -9.0948e-03,  4.0186e-02,
        -3.5487e-03,  5.7998e-03,  2.7486e-02,  7.1237e-02,  2.9421e-02,
        -1.1918e-02,  6.1928e-02, -2.4562e-02,  7.1254e-02, -1.5940e-02,
         5.2963e-02, -2.3566e-02, -2.4953e-04,  3.8286e-02, -5.4825e-02,
         7.4460e-02, -1.4179e-01, -4.0026e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2427e-02,  5.0302e+01, -1.6516e-02,  4.2505e-02, -4.7090e-01,
        -5.5205e-02, -7.8132e-02, -1.3050e-01, -3.0863e-01,  4.4109e-02,
        -1.6530e-02, -1.3701e-01, -5.5844e-02, -2.1098e-01,  3.5456e-02,
         3.7756e-01, -1.0254e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.2523e-01,  6.2984e+01,  2.1136e-01,  2.3345e-01, -6.9169e-03,
        -1.1994e-01, -1.3880e-01, -6.7756e-02,  3.6553e-01,  3.1847e-01,
        -1.6364e-01, -3.0177e-01, -1.5509e-01,  9.2638e-02, -2.3469e-01,
         2.0376e-01, -5.9477e-01, -2.5505e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7611e-03,  5.8798e+00,  3.2494e-02, -5.0692e-02, -4.3076e-03,
        -4.3863e-03, -8.4270e-03, -2.0684e-02, -4.1198e-03,  9.3369e-02,
         2.9070e-02, -3.1657e-02,  7.3710e-03, -1.1876e-02,  1.3479e-03,
        -3.2586e-02, -2.3813e-02, -6.8012e-03,  2.2282e-02,  2.4853e-03,
        -1.5642e-02, -3.3016e-02,  2.0056e-02, -7.2648e-04,  9.6077e-03,
         1.5014e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5698e-01,  2.6640e+01,  1.1808e-01, -8.7180e-02, -4.8915e-03,
        -3.7333e-02,  9.6558e-02,  1.5104e-01,  8.4295e-02, -1.0944e-01,
        -3.4841e-04, -8.3014e-02, -3.5448e-02, -2.6779e-02, -4.2067e-02,
         3.1791e-02, -3.5442e-03,  4.5664e-02,  1.1061e-02,  1.7730e-03,
         7.4461e-02, -1.3444e-01, -1.1023e-01, -1.3987e-01, -2.7222e-02,
         2.1512e-02,  6.9201e-02,  7.3116e-02, -4.9782e-02, -7.9595e-03,
         1.9883e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9725e-02,  1.3358e+00,  3.9772e-03, -1.4997e-04, -2.6615e-02,
        -1.0414e-02, -2.1216e-03,  6.6227e-04,  4.0031e-03, -1.6253e-02,
        -1.0550e-02,  6.4725e-05, -1.0183e-02, -6.1818e-03, -9.5035e-03,
        -7.2045e-04, -1.6985e-02,  8.0950e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2419, 12.0252, -0.1275, -0.0471,  0.0447, -0.1266,  0.0261,  0.0439,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-3.1708e-02,  3.4931e+00,  2.3171e-02, -1.5315e-02,  4.1328e-03,
         9.4104e-03, -4.2451e-03, -1.1138e-02,  7.6040e-03,  2.1520e-02,
        -5.6013e-03, -4.2053e-03,  9.4888e-03,  5.8364e-03,  3.0138e-03,
         4.4665e-03,  6.1022e-03, -2.0628e-03,  3.9369e-03, -6.6266e-03,
        -1.2855e-04,  3.1761e-04, -9.3447e-03,  1.2207e-03,  2.6077e-03,
        -4.9161e-03,  6.1531e-03,  1.2239e-02,  1.8674e-02,  3.8119e-03,
        -1.0001e-02, -4.3363e-03,  9.3702e-03,  4.3784e-03,  1.2033e-02,
         2.3975e-03, -7.6267e-04,  6.0878e-03,  8.3599e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0355e-01,  2.2011e+01, -2.7328e-01, -9.8837e-02, -1.1470e-01,
         4.6509e-02,  1.1194e-01, -2.1036e-02,  7.3745e-03,  1.7769e-01,
        -1.8368e-02,  6.3872e-02,  2.6801e-01,  8.2337e-03, -8.9499e-03,
         3.0158e-02, -5.3270e-02, -1.1780e-02,  3.5718e-02,  5.4548e-02,
         1.5574e-01,  2.3948e-02, -2.8649e-02, -2.8085e-03,  1.4979e-02,
         3.1403e-02, -2.1607e-02, -1.4000e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4877e-01,  1.7336e+01,  2.3665e-02,  2.1292e-01,  4.1463e-03,
        -9.5400e-02,  3.3293e-02,  3.0591e-02,  4.7213e-02,  1.1001e-01,
         1.1273e-01, -2.4095e-01, -2.3658e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9702e-01,  5.0339e+01,  4.4433e-01,  1.5746e-01,  1.5883e-01,
         2.0988e-01, -1.4473e-01,  9.8516e-02,  8.7367e-03,  1.1651e-01,
         4.0479e-03, -1.0805e-01, -8.3036e-02,  1.6991e-01, -3.5688e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.9006e-01,  5.3710e+01,  8.8789e-01, -1.2487e-01,  3.4609e-02,
         9.7629e-02,  3.7856e-01, -1.7367e-01,  2.3125e-01, -2.5606e-01,
        -9.8707e-02,  3.1702e-03, -1.5769e-01,  1.5598e-01,  1.4172e-01,
         1.1908e-01,  2.7344e-01,  5.1222e-02,  5.4464e-02,  1.1162e-01,
         1.3242e-01,  7.4668e-02,  1.1491e-01, -5.3640e-02,  1.5575e-01,
        -2.1517e-02,  1.6296e-01,  1.2743e-01, -7.4704e-02,  6.1599e-02,
         6.4811e-02,  3.2379e-02,  3.0211e-01,  2.5862e-01,  1.9704e-01,
         5.1890e-02, -1.8995e-03, -2.5963e-02, -1.4559e-02,  1.2229e-02,
         1.3007e-01, -1.0958e-01, -2.2867e-02,  5.0958e-02,  2.4512e-02,
         2.1192e-01, -3.5478e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8350e-03,  9.7049e+00,  9.4649e-02,  4.3602e-02, -3.0836e-02,
        -4.8488e-02,  2.8535e-02, -3.1386e-02,  1.0543e-02, -6.1044e-03,
        -1.0472e-02, -6.2601e-02, -8.7211e-03, -2.7312e-02, -1.4918e-02,
         1.0681e-02, -1.3274e-03,  6.0193e-02, -1.3278e-02,  2.0378e-03,
        -1.1125e-02, -8.8930e-03, -4.3099e-03, -1.4927e-02, -1.2625e-02,
        -1.1561e-02,  2.0158e-02, -1.3308e-02,  7.0117e-04,  8.3102e-03,
        -3.5410e-02, -1.2598e-02, -1.1619e-05, -5.3081e-03,  3.6857e-02,
        -1.5173e-02,  1.9992e-02, -7.5914e-03, -5.5090e-03, -8.7342e-03,
        -2.6842e-02,  4.3492e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0382e+00,  3.9332e+01,  2.4229e-01, -1.7043e-01, -7.9095e-02,
        -1.7685e-01,  7.9750e-01, -1.1087e-01, -1.4637e-01,  1.1929e-01,
         3.8392e-01,  3.1165e-01, -6.0206e-02,  6.0012e-02,  1.9031e-01,
        -1.1220e-01,  2.2298e-01, -6.0242e-02, -2.1276e-01, -2.7808e-02,
        -1.0434e-01, -5.2958e-02, -1.5559e-01, -6.0126e-02, -1.5475e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.7483e-01,  1.1957e+01, -1.0099e-01,  3.8441e-03, -8.9918e-03,
        -7.1557e-02,  4.5957e-02, -1.4465e-02,  5.4690e-02,  2.1243e-02,
         2.6440e-02,  4.7296e-02,  2.2160e-02, -3.6808e-02,  4.6776e-02,
         1.9599e-02,  2.2425e-02,  1.0823e-01,  3.7872e-03, -1.8841e-02,
         1.3912e-02, -1.8942e-02, -5.2664e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9731e-01, -6.2660e+01, -7.9140e-01,  2.1014e-01, -9.0871e-02,
        -1.6553e-01, -3.3406e-02,  6.2052e-02,  7.4125e-01, -3.9424e-01,
        -8.6977e-02, -3.8779e-02, -2.6481e-01, -5.4316e-02,  5.3549e-01,
         2.2436e-01, -1.8582e-01, -3.3529e-01,  4.4126e-01,  2.9173e-01,
         3.2911e-01,  5.5396e-01,  5.9669e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5525e-03,  8.2087e-01,  8.4657e-05,  5.4972e-03, -2.2524e-03,
        -1.7232e-04,  2.2494e-03, -4.6834e-03,  4.6191e-03, -2.0785e-03,
        -1.9157e-04, -1.3450e-02,  1.0635e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0396e-03,  2.8649e+01, -3.9873e-01,  4.6797e-01,  4.3279e-02,
        -1.8064e-01,  8.8336e-02,  1.5906e-01, -7.0077e-03,  2.7228e-02,
         2.1973e-02,  1.6871e-01,  7.1967e-02, -5.8429e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5404e+00, -1.3472e+02, -1.0655e+00, -3.4723e-01, -1.5823e-01,
        -2.4870e-01,  3.7758e-01, -1.0515e-01, -1.7663e-01,  1.5327e-01,
        -4.6404e-01, -1.0120e-01,  6.6115e-01, -1.8312e-01, -2.5242e-01,
        -1.4458e-01,  1.0025e+00, -2.9562e-01, -1.0205e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 2.1629e-01,  9.8857e+00, -7.9798e-02, -3.1058e-02, -4.8240e-03,
        -2.6117e-02,  3.2142e-03, -9.4529e-03, -4.1861e-02, -2.5991e-02,
        -1.1473e-02,  1.6448e-02,  3.1994e-03,  5.8622e-02, -3.2447e-02,
        -2.1224e-02,  2.1636e-02, -1.0564e-03,  2.3186e-02,  1.1878e-01,
        -1.7389e-02,  8.3556e-03, -2.2833e-02, -6.1878e-03, -9.6551e-03,
        -2.6711e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0153e-02,  3.1482e+00, -1.4122e-02, -2.7112e-02, -3.1915e-02,
        -2.7109e-02, -1.2499e-02, -2.5298e-02,  3.4548e-02, -2.2056e-03,
         4.1071e-03, -6.6076e-03, -9.0762e-03,  1.0989e-02, -5.9760e-03,
         3.3475e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5404e-01,  6.2973e+00, -2.4777e-02, -2.5308e-02,  1.6466e-02,
         1.2318e-01,  4.4660e-03,  3.5671e-04, -1.2320e-02,  1.4302e-02,
        -1.7237e-02, -4.5160e-02, -1.2637e-02,  6.0523e-03, -1.8895e-02,
         3.6569e-02,  9.1217e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0105e+00, -1.0319e+02, -3.0847e+00, -6.1622e-01,  1.2131e+00,
        -7.7051e-01,  2.5389e-02, -2.8362e-01, -5.5772e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7465e-01,  2.4262e+01,  2.6799e-01, -3.6811e-01, -4.5047e-01,
        -1.5691e-01,  4.9207e-03,  7.5402e-02, -5.5349e-02, -5.6176e-02,
         3.8332e-02,  6.6208e-02,  6.7625e-02,  5.8763e-02, -3.1594e-02,
         2.6785e-02,  7.3878e-02, -4.5409e-02, -1.6640e-02,  5.0377e-02,
         1.2703e-01, -9.6837e-02,  7.8686e-03, -1.0074e-01, -9.7589e-02,
        -8.2070e-02, -1.2658e-01, -4.0930e-02, -1.8155e-02,  8.4353e-02,
         2.4972e-02,  1.1698e-02,  1.4580e-01, -3.1011e-04, -4.8990e-04,
        -3.3019e-02, -4.7640e-02,  6.9979e-02, -1.0511e-01,  1.3548e-02,
         3.5530e-02, -1.0595e-01, -6.4132e-02, -3.8056e-03,  8.0257e-02,
         2.0277e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.3962e-02,  1.7644e+01,  3.1508e-02, -1.6696e-01, -1.5431e-03,
         1.4568e-01, -1.1661e-01,  5.3205e-02, -8.8086e-02,  1.6773e-02,
         1.3906e-02,  1.0838e-02, -2.2823e-01, -3.7229e-02, -2.0629e-02,
         5.4808e-02,  2.9637e-02, -3.8622e-02,  8.0270e-03,  6.5661e-02,
         3.2682e-02,  8.8017e-03, -6.7779e-02, -5.2136e-03, -1.3696e-02,
        -1.3099e-01, -7.9129e-02, -8.1302e-03,  6.9496e-02,  1.4963e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.9361, -33.3289,  -0.1920,  -0.1415,  -0.0653,   0.2336,  -0.2624,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7871e-01, -4.1912e+01, -2.0334e-01, -2.7822e-01, -2.2549e-03,
        -4.6025e-02, -1.1742e-01, -9.2452e-02, -1.1254e-01,  2.4264e-01,
         1.2729e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4137e-01,  1.1613e+01,  7.3966e-02,  1.0616e-01,  5.3106e-02,
         8.5822e-02,  5.7420e-02, -2.0541e-02,  2.8437e-02,  2.3903e-02,
        -1.1875e-02, -7.0719e-03, -2.5402e-01, -1.5325e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9880e-02,  3.4152e+01, -1.6765e-02,  1.0927e-01,  9.7409e-03,
        -1.1117e-02,  2.6567e-02, -4.2153e-02, -1.7552e-02,  1.6288e-01,
         7.6132e-03,  1.2789e-02, -5.8144e-02, -4.5395e-02, -4.1468e-02,
         8.9274e-02,  3.2881e-02, -3.3192e-02,  1.6348e-02,  1.0625e-01,
         2.3699e-01,  4.7642e-03, -2.0384e-01, -4.6477e-02, -6.5162e-02,
        -2.7337e-02,  9.7686e-02,  5.2554e-04, -1.5434e-02,  1.2997e-01,
         2.4171e-01, -5.0704e-02, -6.1013e-02, -4.1510e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0761e-02,  1.6170e+00,  1.3394e-02,  1.6470e-02, -1.6875e-03,
         1.8769e-04,  3.6955e-04, -8.4260e-03, -9.5305e-03,  1.8308e-05,
        -5.5876e-03,  2.5264e-04,  3.3143e-03,  3.9200e-03,  2.3038e-02,
         5.6245e-04, -8.6345e-03,  2.1242e-03, -1.3427e-03, -5.5935e-03,
         2.3379e-03, -4.3080e-03, -5.5226e-03, -1.0337e-03, -4.8460e-04,
         2.6431e-03, -7.2470e-04, -7.2924e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1559e-02,  2.8020e+01,  5.9483e-01, -1.6362e-01,  2.0792e-01,
         1.8315e-01, -2.0969e-03,  3.0877e-02, -6.7508e-03,  3.2305e-02,
        -2.3815e-02, -1.8478e-02, -3.5473e-02,  1.1520e-01, -2.5692e-02,
        -7.9235e-02,  1.1913e-01, -3.4160e-01,  2.4872e-02, -4.1696e-02,
        -9.6020e-02,  1.9166e-02, -1.9729e-01,  1.0972e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 3.5727e-01,  1.5586e+01,  1.0433e-01, -4.4764e-02, -2.4851e-02,
        -5.1092e-03,  2.2712e-03, -6.1681e-02,  4.2260e-02,  7.4524e-03,
         2.9796e-02, -3.4371e-02,  1.3884e-02, -2.4597e-02,  6.7796e-03,
        -3.0538e-02,  1.2219e-02,  3.2814e-02,  3.2981e-02, -1.2353e-02,
        -8.2199e-02, -3.6121e-03, -2.5960e-02, -4.3577e-03,  2.1536e-03,
         3.7917e-03, -2.5466e-02, -2.4487e-02, -2.3860e-02, -1.1524e-01,
        -5.6483e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1773e-01,  8.1844e+00, -5.3383e-02,  3.8388e-04,  1.8883e-02,
         2.9343e-02,  1.1621e-02,  2.4587e-02,  4.1019e-02, -1.5733e-02,
         3.9094e-02,  2.6887e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3147e-01,  1.4675e+01, -6.2454e-02, -7.2258e-02,  4.5039e-02,
        -1.6176e-01, -3.4840e-02, -2.9372e-04, -5.1273e-02, -1.0035e-01,
        -2.9963e-02,  7.5313e-02, -1.0435e-02,  1.5697e-04, -1.3203e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1744e-02,  1.4817e+01, -2.9196e-02, -6.2369e-02,  1.8296e-02,
        -5.0197e-02,  8.7794e-02, -7.6597e-03, -8.0594e-03, -9.9948e-03,
         1.3927e-02,  2.4945e-02, -3.0554e-03, -1.7553e-02, -2.2526e-02,
        -2.4606e-02,  5.6512e-03, -1.6422e-02,  4.2673e-02,  2.5664e-02,
        -6.4772e-04,  1.1765e-02, -7.0420e-02, -1.7693e-02,  1.2727e-02,
         2.7867e-02, -1.1801e-02, -2.2412e-02, -3.7611e-02, -3.0911e-02,
        -1.2824e-02, -3.3810e-02,  2.1680e-04], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6191e-01,  3.3686e+01, -2.0873e-01, -5.3572e-01,  1.3700e-02,
        -1.0651e-01, -2.5726e-01, -1.8061e-01, -2.0306e-01, -2.0038e-01,
        -2.2245e-01, -1.1771e-01, -3.5995e-01,  2.1046e-01, -2.2707e-01,
         3.0509e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1275e-02,  5.7356e+00,  1.0160e-01,  2.1831e-02,  2.8261e-02,
        -4.5515e-03, -6.6662e-02, -5.1357e-02, -1.0545e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0533,  0.9833, -0.0184,  0.0124, -0.0431,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0986e+00,  1.3180e+02, -2.4713e+00, -8.6204e-01,  5.0534e-01,
        -1.5792e+00, -2.6060e-01, -2.6527e-01, -2.9461e-01,  2.2719e-01,
         2.5300e-01, -2.5555e-01, -9.8875e-01, -9.2603e-02,  2.0186e-01,
         3.3316e-01,  5.9092e-01, -4.6647e-02,  6.6372e-01,  1.5509e-01,
        -2.8778e-01,  5.2176e-01,  1.6257e-01,  4.0342e-01,  2.5218e-01,
         1.2516e+00, -4.6490e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6059, 35.1591, -0.0371,  0.1512,  0.5506,  0.1118,  0.6756,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6229e+00, -1.1464e+02, -2.8903e-01, -5.0580e-01, -2.6556e-01,
        -3.8251e-01, -5.9056e-02, -7.9547e-02,  1.8598e-02,  5.3675e-02,
        -4.3994e-02, -4.1649e-01,  7.7207e-02, -6.4241e-01,  4.0832e-01,
        -2.4115e-02,  5.7105e-02,  2.8863e-01, -7.4699e-01, -7.6482e-01,
        -7.6772e-02, -9.2232e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2483e-01,  4.7554e+00,  9.0170e-02,  3.6232e-03,  4.2901e-02,
         3.2476e-02,  5.6554e-03, -6.4450e-03, -1.4733e-02, -3.6329e-02,
        -3.2982e-02,  1.4882e-04, -2.2085e-02, -6.5016e-03, -6.7220e-03,
        -8.7765e-03, -6.2743e-03, -1.2854e-02, -3.4397e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8388e-01,  3.3991e+01,  1.7577e-01,  9.1967e-04,  1.7290e-02,
         1.5944e-01, -5.5267e-02, -1.1946e-01,  1.2604e-01, -5.1677e-02,
        -4.3411e-02,  7.1533e-02,  1.9860e-03,  4.2900e-02,  1.1666e-02,
         2.7445e-02,  4.9117e-02,  6.1228e-03,  2.8868e-02,  4.9689e-02,
         5.0018e-02,  2.5946e-02,  7.4836e-02,  4.9945e-02,  2.0163e-03,
         2.1638e-02, -4.3593e-02, -4.2901e-02,  6.1934e-02,  3.2310e-02,
         1.5979e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-8.6102e-01, -6.6843e+01, -2.8512e+00,  3.4360e-01,  3.1947e-01,
         3.8795e-01,  1.6588e-01, -5.7364e-02,  2.0583e-02, -2.4620e-01,
         3.7860e-02, -2.1211e-02,  1.4888e-01, -4.7380e-01, -1.2145e-02,
         1.8300e-02,  6.5972e-02, -1.5785e-01, -5.8062e-02,  6.9597e-02,
        -7.8328e-02,  9.4571e-02,  1.2765e-02, -2.2255e-01,  1.7782e-01,
         1.4736e-01, -7.5646e-03,  5.2458e-01, -1.1117e-01,  1.6427e-01,
         8.3869e-02, -6.4867e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3488e-01,  1.0169e+02,  1.9766e-01,  5.1397e-02,  4.2200e-02,
         1.1252e-01, -1.6768e-01,  6.7106e-01,  1.6110e+00,  3.2728e-01,
        -8.1301e-01,  3.8684e-01,  7.8456e-02, -2.3528e-02,  4.2541e-01,
         1.7961e-01,  4.4555e-01,  3.1635e-02, -1.7625e-01,  3.3202e-01,
        -2.4015e-01, -3.7946e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8139e-01,  3.2337e+01,  2.4185e-01, -1.5975e-01,  6.8282e-02,
         1.4052e-01,  4.8068e-02,  6.2714e-03, -1.2349e-01,  7.0758e-02,
        -5.8155e-02, -1.7077e-01,  2.2228e-02,  1.6199e-03, -3.7421e-01,
        -4.8211e-02, -1.2353e-01,  3.8930e-01, -1.2596e-01, -1.2418e-01,
         6.0589e-02, -1.7958e-02, -1.1419e-01, -8.0879e-02,  6.4869e-03,
         1.3654e-01, -3.7092e-02, -2.6604e-01, -2.7936e-02,  1.6812e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1928e-02,  3.3781e+00, -2.2535e-02, -1.7346e-02, -3.3078e-03,
         7.4716e-03, -1.4200e-03, -5.7150e-03, -2.0711e-02, -4.7344e-03,
        -1.7767e-02, -5.4756e-03, -2.8161e-03, -2.0263e-03, -6.9427e-03,
         4.1671e-03,  3.1037e-03,  2.6082e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8042e-02,  1.0880e+01,  4.0371e-01, -6.0273e-02,  2.4081e-02,
         5.5789e-03, -1.5357e-02, -1.5411e-02, -3.9020e-02,  2.9520e-02,
         2.8405e-02,  1.6660e-02,  4.2584e-02,  3.8489e-02, -4.3178e-02,
        -2.3390e-02,  4.1628e-02,  5.7566e-03, -1.0643e-01,  2.5350e-02,
         2.1194e-02, -3.0505e-02,  2.2747e-02,  2.8884e-02,  2.3166e-02,
         1.3330e-03,  9.3499e-04,  8.9819e-03,  3.4324e-02, -2.8446e-03,
         1.2242e-02,  1.6712e-02,  1.5883e-02, -3.7342e-02, -1.6278e-02,
        -3.5037e-02,  9.5898e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5738e-01,  6.8271e+00,  1.3143e-01,  6.4033e-02,  2.5312e-02,
         6.5159e-04,  1.1883e-02, -2.1652e-02, -1.1090e-02, -2.1541e-02,
        -7.4502e-04,  1.0081e-02, -4.3375e-02, -2.0060e-02,  1.2616e-02,
         7.4231e-03, -3.6251e-03, -4.6805e-02,  1.5180e-02, -1.5801e-04,
         3.0033e-03, -1.5673e-02,  3.3060e-02, -5.4471e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6736e-02,  2.2811e+01,  1.0417e-01, -8.9700e-02,  2.2332e-02,
         4.5432e-02, -6.9385e-02,  1.0600e-01,  8.1091e-02, -3.3317e-02,
         5.2373e-02, -2.6038e-02,  7.5760e-02, -6.3064e-03, -2.5700e-02,
         6.3923e-02,  3.1644e-02, -6.8041e-03, -1.7160e-04,  4.8530e-03,
         2.7785e-03, -7.2699e-02,  9.0959e-03, -4.2950e-03,  6.0645e-02,
         6.7472e-02,  2.6052e-02, -4.8321e-03,  3.6384e-02, -1.2361e-02,
         7.3581e-02,  6.1880e-02,  1.4403e-02,  6.3769e-02,  4.9689e-02,
         1.5150e-01,  7.9569e-02,  2.6821e-02,  2.3021e-02, -7.6730e-03,
         1.6989e-02, -2.5254e-02,  3.6965e-02,  3.2723e-02,  1.6184e-02,
         9.3124e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4338e-01,  4.8682e+01, -1.3971e+00, -1.0220e-01, -8.5730e-01,
         1.1068e-01, -1.2210e-01, -2.4635e-01, -4.1593e-02, -3.7100e-01,
         1.6422e-03, -7.4484e-02, -5.7167e-02, -1.5659e-01, -3.5076e-02,
        -1.4114e-01,  4.5604e-01,  7.7279e-01, -1.0597e-01,  8.2146e-02,
         1.1794e-01,  3.1080e-01,  2.4917e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2529, 11.1139, -0.1946,  0.0980, -0.0648,  0.0174, -0.0209,  0.1026,
         0.0392, -0.0670,  0.0281,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5155e-01,  5.4401e+01, -6.9950e-01,  3.6348e-02,  2.3122e-01,
        -3.4498e-01,  8.0619e-02,  1.8483e-01,  4.6406e-01,  9.0572e-02,
         1.5437e-01, -1.0442e-01, -3.3061e-01, -7.5902e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2245e-02,  4.3349e+00,  2.4213e-02, -5.3853e-02, -8.5493e-03,
        -1.1222e-03, -5.2709e-02,  2.4601e-02, -6.1543e-03,  4.4554e-04,
        -5.9942e-03,  1.9924e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7955e-03,  6.2734e+00,  3.4309e-02,  4.6707e-02, -2.1637e-03,
         4.0955e-04, -3.8704e-02,  6.7362e-03,  3.0211e-03,  2.9001e-02,
         1.0375e-02, -4.9912e-03,  3.7903e-04,  1.0211e-02,  5.4409e-03,
         9.3071e-03,  7.6065e-04,  5.1896e-03,  8.4700e-03, -2.6480e-02,
        -2.3434e-03, -1.0808e-03,  2.8247e-02, -7.2024e-03,  3.0813e-03,
         1.1456e-02,  5.8785e-02,  3.0947e-04,  4.3600e-03,  8.7038e-03,
        -2.0342e-04,  4.5387e-02,  2.1488e-03, -1.0656e-03,  2.2988e-02,
         2.8895e-04, -1.1388e-04, -4.6390e-03,  5.8992e-03, -4.0953e-03,
         1.5306e-03,  6.5722e-03, -3.7254e-04, -7.1902e-04,  1.4520e-02,
         1.0108e-02,  1.3971e-02, -2.7171e-03, -4.1758e-03, -1.0255e-03,
         7.6796e-03, -2.8621e-03, -1.0280e-02,  2.2173e-03,  2.1461e-03],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-2.5725e-01, -7.2374e+01, -5.2468e-01,  3.9846e-01, -3.9758e-01,
         5.6663e-03, -1.0815e-01, -7.0400e-02,  3.6434e-03,  2.0064e-01,
         7.0204e-01,  1.3950e-01,  3.3285e-03,  3.6157e-01,  1.0463e-01,
        -8.6623e-02, -2.2422e-01,  5.4480e-03, -2.1597e-01, -5.1603e-02,
         2.0284e-01, -1.5378e-01,  1.1535e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8664e-01,  4.4149e+01, -2.3293e-01, -1.0624e-02,  1.7948e-01,
        -9.5745e-02, -1.6615e-01,  8.3841e-02,  1.1204e-01, -9.1946e-02,
        -2.4660e-01, -3.1500e-02, -6.2953e-02, -6.4907e-02, -7.0004e-02,
        -2.7774e-02, -2.2521e-01,  2.3292e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8015e-01,  2.7109e+01, -8.6694e-01, -4.6925e-01,  2.5472e-01,
        -1.2929e-01, -3.5629e-01, -9.3201e-02, -1.2881e-01,  3.4090e-02,
        -2.6903e-02, -1.9935e-01, -9.6457e-02, -6.0381e-02,  8.4667e-02,
        -5.9185e-02, -1.8420e-02, -3.0487e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9018e-02,  1.3224e+01, -5.0559e-02,  1.0988e-01,  1.1416e-02,
        -2.7937e-02,  3.4596e-02,  1.2834e-01,  1.2271e-02,  4.3982e-03,
         1.2032e-02,  6.9334e-02,  3.1301e-02,  3.1009e-02, -1.3283e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2100e-02,  1.8752e+00, -1.2163e-03, -8.4792e-03, -9.6027e-03,
        -8.4119e-03,  8.2111e-04,  3.1044e-04, -5.3041e-03, -1.2502e-03,
        -6.9435e-03, -1.3240e-03,  6.0310e-03, -1.2422e-03, -5.0141e-03,
         1.8956e-03,  7.5262e-04,  1.6659e-03,  1.3610e-03,  1.7770e-02,
         7.6891e-03,  6.6163e-03,  4.8281e-03, -1.5803e-03, -1.7331e-02,
         1.4339e-03,  1.0088e-02,  5.4749e-04,  2.9675e-03,  1.2103e-02,
         1.2332e-03,  1.9006e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8298e+00,  9.1285e+01, -2.1607e-01,  2.4596e-01, -4.6295e-01,
        -7.2153e-03, -2.1803e-01, -1.0339e-01, -2.2685e-01, -5.4937e-02,
         3.3820e-01, -3.7946e-01,  2.1695e-01,  3.5708e-01,  4.4940e-02,
         1.0860e-01,  7.3700e-02, -3.3493e-01, -4.2650e-01,  2.7793e-01,
         5.6721e-02,  4.7716e-01, -3.6038e-01, -2.5870e-01,  1.2077e-01,
         5.2324e-02,  3.0406e-01, -4.3557e-01, -4.9507e-02,  1.2774e-01,
         1.4615e-01,  2.0888e-01,  6.8063e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9207e-02,  5.2892e+01,  5.8407e-01,  9.8229e-02, -3.5844e-02,
         9.8678e-02,  6.9242e-03,  7.2843e-03,  1.6780e-01,  2.3753e-01,
         8.6668e-03, -1.9153e-01, -1.1122e-01, -2.9349e-01, -1.1778e-01,
         7.1011e-02,  1.4858e-01,  8.3044e-02, -9.4693e-02, -3.9121e-02,
        -6.3365e-02, -2.1552e-02, -4.1794e-02, -2.9789e-01,  7.6346e-02,
        -3.6129e-03,  9.5888e-02, -1.2446e-02, -2.1370e-03, -2.9104e-03,
         1.1220e-01,  2.0738e-02, -7.6597e-02, -2.3196e-02,  2.9134e-02,
         7.5140e-02, -1.0785e-01,  1.1875e-02,  4.9775e-02,  8.1092e-02,
         2.3598e-02, -7.8271e-02,  6.6207e-02, -4.9378e-02,  6.0543e-02,
        -1.2288e-01, -4.9835e-02, -2.9835e-02, -5.3853e-02,  9.3006e-02,
         7.0221e-02, -1.0983e-01, -7.1258e-02,  6.2594e-03,  1.8107e-02,
         1.5558e-02, -3.3146e-01,  1.3341e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.9259e-01,  5.4144e+01,  1.0111e-02, -2.9962e-01, -1.8666e-01,
        -2.4321e-01,  3.3621e-03,  1.6745e-01,  3.1642e-01, -4.8152e-01,
         1.2470e-01,  2.3837e-01,  5.5354e-02,  3.9624e-02,  2.8788e-01,
        -3.0262e-01,  1.7860e-01,  3.6167e-02,  6.6284e-02,  1.5045e-01,
         4.3760e-03, -3.5753e-02,  1.1478e-01, -2.2813e-01,  4.1193e-01,
        -4.7034e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4934e-02,  4.4897e-01, -8.0243e-03,  2.0823e-05, -8.3946e-03,
         2.3081e-03, -9.0798e-04, -6.0708e-04, -4.3723e-04, -1.3962e-03,
         2.7083e-03, -1.6745e-04, -3.3097e-05, -9.7057e-04, -1.6367e-03,
        -7.6458e-04, -6.6496e-04,  1.3555e-03, -1.2959e-04, -1.9815e-03,
         1.0036e-05, -2.7269e-03, -7.1557e-04,  4.8082e-04, -1.7668e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4342e-01,  5.8472e+01,  1.4209e+00,  3.0200e-01,  1.2891e-01,
         1.0936e-01,  1.5840e-02,  3.5563e-03,  2.1173e-01,  5.5021e-02,
         1.6186e-01,  1.7827e-02,  2.4563e-01,  3.6948e-01,  1.1301e-01,
        -3.9533e-01, -9.2280e-02,  2.4011e-02,  1.2589e-02, -5.8568e-02,
        -2.2422e-02,  1.1003e-01,  6.6137e-02, -5.7051e-03,  9.8431e-02,
         3.8586e-01,  1.9680e-01,  8.6104e-02,  3.7019e-02,  4.9805e-02,
         1.0149e-01, -2.4029e-02, -2.1199e-01,  2.5072e-03,  1.7319e-01,
        -6.9394e-02,  5.4832e-03,  2.7533e-02,  1.9451e-01,  1.8184e-01,
        -1.2058e-01, -2.3248e-02,  1.7254e-01,  1.4376e-01,  7.3051e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4648e-03,  5.4357e-01,  3.0336e-04,  4.4494e-03, -2.3305e-03,
        -2.7819e-04, -3.2693e-03, -1.5165e-03, -4.9010e-04, -1.0414e-04,
        -1.4565e-03, -1.0970e-03, -3.4048e-04, -7.4450e-04, -1.1051e-03,
        -1.7581e-03,  4.8033e-03, -2.2138e-03,  5.9427e-04, -1.7368e-04,
        -2.7983e-04,  4.5629e-03,  8.3481e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4652e-02,  1.0533e+01,  1.4835e-01,  2.1567e-01, -9.8789e-03,
        -1.0068e-02,  1.9611e-03,  5.7705e-02, -3.0149e-02, -9.5474e-03,
         8.7979e-02,  1.2863e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 8.1897e-02,  5.9613e+00,  9.9165e-02, -6.1265e-02,  1.7308e-02,
        -1.7186e-02, -1.2379e-02,  9.1337e-03,  3.5762e-02, -1.9340e-02,
        -2.9063e-02, -1.2410e-02, -4.5309e-03,  7.8872e-03, -3.9149e-03,
         9.2767e-03,  1.3208e-02, -2.3448e-03,  2.0268e-02,  9.5620e-04,
         9.3446e-03,  1.2987e-02,  2.1502e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9612e-02,  2.3993e+01, -1.2013e-01, -1.8722e-02, -2.6972e-02,
        -8.1667e-02,  5.9490e-02,  3.8717e-02, -1.2768e-02,  1.0568e-01,
         7.7684e-03, -9.7543e-02, -6.8899e-02,  4.9425e-02, -2.8449e-02,
         1.1750e-02, -3.5246e-02, -4.8697e-02, -4.9241e-03,  4.2085e-02,
         7.0781e-02, -5.0118e-03, -1.2223e-01, -7.4539e-03,  2.4816e-02,
        -6.5046e-02, -6.7477e-02, -4.3322e-03, -9.5716e-03, -2.2529e-02,
         5.1206e-02, -2.1948e-03,  7.3399e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2034e-03,  2.3483e+01,  6.7401e-02, -2.2325e-01,  1.7949e-01,
        -1.5441e-03, -7.6947e-02,  1.4463e-02,  3.6301e-03, -4.6447e-03,
         4.4061e-02,  3.7082e-02, -3.5800e-02, -3.1745e-02,  6.7186e-02,
        -1.1365e-01,  5.0770e-02,  7.2305e-02, -5.4214e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3893e-03,  6.4988e+00,  6.0373e-02, -4.6324e-02, -5.6516e-02,
         5.9072e-02,  4.2572e-02,  8.1312e-03,  3.9958e-02,  2.3154e-02,
        -8.9445e-02,  3.8351e-02,  2.4290e-02, -9.7290e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6880e-02,  8.9400e+01,  1.2095e+00,  2.2813e-01,  8.0147e-02,
         1.2606e-01,  3.0794e-01, -5.9336e-01,  5.3785e-02, -2.0350e-01,
         2.2015e-01, -5.8817e-03,  2.3799e-02,  1.2120e-01, -1.6147e-01,
         8.6016e-02,  2.1244e-01,  2.7200e-01, -2.7074e-01,  2.7608e-01,
         3.6971e-01, -3.1949e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6350e-01,  2.2127e+01,  1.3132e-02, -1.1598e-01, -7.4164e-02,
        -9.1479e-02, -1.0975e-01, -1.9976e-01, -8.1262e-02,  5.3234e-02,
        -1.0053e-01,  6.4829e-03,  4.2967e-02,  2.1887e-02, -6.2007e-02,
        -1.3573e-01, -1.5917e-02,  3.9235e-02, -1.3372e-02,  5.5612e-02,
        -6.3427e-02,  1.5795e-03, -2.2344e-02, -4.9054e-02, -2.3426e-02,
        -2.2109e-02, -1.0681e-02, -2.1839e-02,  6.9240e-02, -2.5049e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6481e-02,  1.8378e+00, -2.5766e-02, -2.1824e-02, -9.9274e-04,
         2.3266e-02,  7.5280e-03, -1.4596e-02,  2.1809e-03, -7.3626e-03,
        -1.5783e-03,  3.3124e-03,  1.3278e-03,  9.9047e-04,  9.2514e-03,
         1.5629e-03, -2.7619e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4897, 21.8270,  0.6166,  0.3754, -0.2269,  0.0627, -0.1382, -0.0283,
        -0.0464,  0.0246, -0.1375,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9390e-01,  1.0175e+02, -4.3829e-01, -7.0096e-01, -2.3754e-01,
        -3.7495e-01, -3.0623e-01, -3.2320e-01, -6.0865e-01, -2.5703e-01,
        -3.3264e-02, -1.5902e-01, -7.8312e-01, -6.1549e-02, -7.5919e-01,
        -5.9620e-01,  7.1570e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0754e-01,  2.8149e+01,  7.1579e-02,  2.9892e-01, -1.7034e-01,
         1.5263e-01, -1.0620e-02, -5.9322e-02, -5.8862e-03, -7.3700e-03,
        -9.7650e-02,  1.9038e-02, -4.6245e-03,  7.5779e-02,  9.6263e-02,
        -5.8481e-02, -2.1564e-02, -5.1892e-02,  4.5882e-03, -8.0211e-03,
        -5.3942e-02,  5.8822e-02,  1.0530e-01, -5.0462e-02, -9.0631e-02,
         1.2820e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.8539e-02,  1.4546e+01,  5.1188e-02, -6.2643e-02, -1.3784e-01,
         1.7494e-02, -1.1588e-02,  8.1096e-02,  1.5812e-02, -5.9880e-03,
        -1.0070e-02,  1.3765e-02, -4.9871e-02, -9.0420e-04,  5.3944e-02,
         3.0073e-02, -4.4389e-02, -1.8837e-02, -2.2178e-02, -1.9198e-02,
        -2.6103e-02, -1.1274e-02, -1.5993e-02,  4.8717e-03,  8.8633e-02,
        -2.0816e-02,  8.7058e-03, -5.1797e-02, -1.4498e-01, -3.4529e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1704e-01,  1.2956e+01,  5.3951e-03, -9.5723e-02, -1.1559e-01,
        -2.6174e-02,  5.9968e-02,  4.7843e-02, -1.5889e-02, -2.9312e-02,
        -3.2245e-03, -6.2485e-03, -4.4319e-02,  1.1004e-02, -9.5314e-03,
         1.2462e-02, -2.9994e-02, -1.6183e-02,  1.8987e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 1.1723e-01,  1.4018e+01,  8.4014e-02,  1.5891e-02,  6.2898e-02,
         1.5224e-03,  1.6320e-01,  5.7980e-02,  6.0785e-04,  6.0298e-02,
         3.4504e-02, -1.5062e-02,  1.8500e-02,  1.8170e-02,  6.5996e-03,
        -1.4183e-02,  2.6135e-02,  9.4215e-02, -1.0157e-03,  1.1570e-02,
         2.2858e-02,  4.6732e-02,  2.4977e-02, -1.1284e-01,  4.1581e-02,
        -5.5324e-02,  9.9067e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1918e-02,  3.5822e+00,  8.1500e-03, -5.1015e-02, -7.7188e-03,
        -4.7411e-03, -1.7851e-03,  1.8581e-03,  1.3450e-02,  3.1148e-02,
         3.1360e-02,  2.6305e-02,  1.2606e-02, -2.0334e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4810e-03,  4.6186e+00,  7.3132e-02, -9.8097e-04,  1.8302e-02,
        -1.0170e-02,  1.4629e-02,  1.2035e-03, -1.8266e-02, -6.8873e-03,
         1.2566e-03,  1.9671e-02, -2.3337e-02, -1.1743e-02, -8.1899e-03,
        -2.2428e-02,  9.1153e-03, -3.0583e-03, -5.1296e-03,  2.1179e-02,
        -1.7739e-02, -6.0961e-03,  8.6724e-03, -3.0976e-03, -7.3354e-03,
         1.4256e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2593e-02,  5.2041e+00, -9.0175e-02, -8.6654e-02, -2.1384e-02,
        -2.9839e-02,  5.5071e-02,  5.0315e-03, -3.3695e-02,  3.4333e-02,
         8.8555e-03, -2.8173e-02,  8.0603e-03, -6.0679e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7496e-01,  1.0610e+01, -1.4413e-03, -1.7358e-02, -9.7083e-03,
        -1.9725e-02,  4.1118e-02,  1.6346e-03,  9.8042e-03,  1.5952e-03,
        -4.5950e-03, -3.8221e-02,  2.8821e-02, -3.6497e-03,  1.5819e-04,
         9.8219e-03,  1.0506e-02, -2.4710e-02, -6.5569e-04,  6.5230e-03,
        -4.0705e-04,  9.6448e-03,  9.0825e-03, -3.0974e-02, -2.3105e-02,
         4.0533e-03, -2.8796e-02,  1.8692e-02, -1.4240e-02, -3.6223e-03,
        -2.3597e-02,  3.1111e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2037e-02,  9.9949e+00,  5.7883e-02, -7.3882e-02,  3.2888e-02,
        -3.6473e-02,  3.1787e-02, -5.8175e-02,  1.2113e-02,  2.6697e-02,
         5.1304e-02,  3.0640e-02,  2.8612e-02,  3.7856e-03,  7.3180e-03,
        -8.4595e-03,  8.2964e-03,  1.7187e-02, -1.0688e-02,  3.1752e-02,
        -2.5416e-02,  2.3509e-02,  4.6293e-03, -8.0234e-02,  1.0388e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1575,  7.8261,  0.1191, -0.0817,  0.0190,  0.1178,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.0152e-02,  4.0604e+00,  6.2735e-02, -1.1918e-03,  2.5136e-02,
         7.7931e-03,  2.0984e-03, -7.1887e-03,  5.6804e-03,  2.3700e-02,
         3.0451e-03,  2.5480e-02, -7.6943e-04,  3.0246e-02,  1.8506e-03,
        -2.9082e-03,  4.9662e-03,  9.9839e-03,  8.0788e-03,  1.7703e-02,
        -1.4430e-03, -2.7337e-03,  5.3414e-03,  7.3412e-03,  5.5193e-03,
         1.9149e-02, -1.2523e-03, -3.1715e-03, -3.0675e-03,  2.9142e-02,
         6.1880e-03, -6.8467e-03, -1.1073e-02,  1.8754e-03,  2.6643e-03,
         1.4731e-03,  6.3088e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4916e-01, -3.2937e+01,  2.5695e-01,  1.9451e-03,  1.0741e-01,
        -9.5630e-02,  4.4047e-02,  2.7414e-01, -1.5833e-02, -1.0182e-01,
        -1.6561e-02, -2.5883e-02, -4.7241e-02,  6.8302e-02, -1.1148e-01,
         5.4840e-02,  2.3562e-02,  3.9652e-02, -6.7555e-03, -1.1751e-01,
         1.0685e-01,  2.0442e-03, -7.1826e-02, -1.0442e-02, -1.0683e-02,
         2.8625e-02,  7.5171e-02, -1.3296e-01,  9.0678e-03, -5.7492e-02,
         1.8134e-02,  4.6760e-02, -1.6803e-02,  3.4326e-03,  4.1880e-02,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2135e+00, -5.6918e+01,  4.4888e-01,  4.7470e-01,  5.6453e-02,
         5.0507e-01,  6.9170e-02,  3.4213e-02,  1.5853e-01, -4.3977e-01,
        -2.6622e-01,  4.7453e-02, -2.8281e-01, -4.6939e-01,  2.1249e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1540e+00, -1.0922e+02,  7.5174e-01, -2.5933e-01,  1.1950e-01,
        -4.6806e-03,  8.3860e-02, -9.1430e-01, -1.6633e-02, -6.4823e-01,
        -1.9752e-02,  2.5530e-02,  4.3237e-02,  5.4968e-02, -2.0012e-01,
        -2.6898e-01, -1.0534e-01, -1.6990e-01, -5.5183e-01, -1.6501e-01,
         2.0104e-01, -3.6052e-01,  2.0331e-02,  3.3267e-02, -4.5748e-01,
         1.8939e-02, -1.0181e-01,  6.6755e-02, -6.7824e-02, -2.3746e-02,
        -8.7611e-02,  3.2878e-01, -1.7193e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7811e-02, -2.5448e+00,  5.3238e-03, -8.7701e-05,  6.6617e-03,
         1.5530e-03,  1.9334e-03, -3.2596e-03,  9.9044e-03,  4.7358e-03,
        -1.9587e-03,  1.0983e-02,  3.7322e-04,  8.4760e-03, -9.2852e-03,
        -1.0885e-02, -1.6152e-03,  4.5759e-04, -1.7482e-03,  6.4814e-03,
         1.7455e-02,  5.5745e-03, -6.4741e-03,  5.5165e-03, -4.5962e-03,
        -5.5148e-03, -1.5552e-02, -2.0743e-03,  2.9069e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.4726e-03,  2.6711e-01,  2.3132e-03, -7.6927e-04, -1.3110e-03,
        -3.3810e-04,  3.8228e-04,  2.1597e-03, -1.0104e-03, -4.3598e-03,
         3.8111e-04,  1.3680e-06,  7.1769e-04,  5.5027e-04,  2.7072e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4324e-02,  2.7525e+01, -1.5914e-01, -3.7264e-02,  1.5310e-02,
         4.2670e-02,  9.0453e-02,  1.2171e-01,  7.7647e-02,  8.0837e-02,
         2.3589e-02,  1.0820e-02, -2.0244e-02, -1.8276e-04,  2.7677e-02,
         3.4940e-02,  7.7237e-03,  1.0872e-02,  9.9685e-03,  2.5709e-03,
        -3.6995e-02, -4.7421e-02, -1.3210e-02,  1.5330e-02,  2.8294e-02,
         3.8952e-02, -4.0570e-02,  7.5503e-02, -7.8091e-02, -1.3407e-02,
        -1.8433e-02, -6.0896e-02, -4.4325e-02,  8.8896e-03, -2.8533e-02,
         3.3129e-02,  3.4521e-02,  2.4718e-02,  3.3785e-02, -6.1421e-02,
        -3.2662e-02,  9.1724e-02,  2.8637e-02, -4.1598e-03, -7.8465e-03,
        -4.8916e-02, -4.0826e-03,  4.5051e-02, -7.6598e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9677e+00, -8.3702e+01,  2.2201e-01,  4.2099e-01, -3.9713e-01,
        -5.9499e-01, -3.4620e-01,  6.8017e-01,  1.2517e-01, -4.0698e-02,
         2.9522e-02,  1.1398e-01,  6.8348e-01,  3.2074e-02,  1.1616e-01,
        -2.5994e-01, -5.4479e-01,  2.1306e-01, -1.2660e-02, -4.5866e-01,
         1.4858e-01,  3.5322e-01, -1.3312e-02,  4.8464e-02,  1.4377e-01,
        -9.9418e-02, -6.6531e-02,  2.5787e-01,  1.3524e-02,  1.6731e-01,
        -1.8910e-01,  1.5416e-03,  3.0910e-02, -1.5866e-01,  2.3766e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.3429e-02,  9.0372e+00, -2.8686e-02,  2.2388e-02,  5.8841e-02,
        -2.9237e-03,  2.7966e-02,  5.3737e-02, -2.1014e-02, -1.1636e-02,
        -3.0252e-02,  4.6340e-03, -2.2975e-02, -2.1909e-02,  6.4343e-02,
        -8.3054e-02,  4.2078e-03, -2.0374e-02,  1.4753e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6475e-02,  4.7508e+00, -3.1793e-02, -1.9014e-02, -2.9528e-03,
        -6.0318e-03, -3.5867e-02, -2.5407e-02,  2.9275e-02, -1.5161e-02,
        -4.9149e-02, -7.0029e-03, -2.0916e-02, -7.1619e-02, -2.0558e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2930e-02,  7.3148e+00, -2.2566e-02,  2.2029e-02, -7.0839e-03,
        -2.8975e-02,  7.4469e-03, -5.8350e-03, -1.7715e-02,  9.8296e-03,
         3.3206e-02,  1.1552e-02, -3.7521e-04,  2.3976e-04,  2.0697e-03,
        -8.2490e-03,  1.3070e-02,  7.7317e-03, -7.2112e-03,  7.1331e-02,
        -2.3394e-02,  1.2437e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8801e-02,  3.6091e+00, -7.0821e-03,  2.8476e-03, -7.6985e-03,
        -7.5847e-03, -8.8801e-03, -7.2501e-03, -9.6966e-03, -2.0154e-02,
        -1.2035e-02,  3.8470e-03, -2.3601e-02, -4.2735e-03,  2.7308e-03,
         1.9719e-03, -2.8827e-03, -6.5747e-05, -1.2570e-02, -1.5938e-03,
        -6.7568e-04, -8.4238e-03, -1.1697e-02,  6.8574e-03,  4.6035e-03,
        -1.4690e-03, -5.9225e-03, -8.3485e-03,  8.5492e-03,  3.2500e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1040e-02,  2.2227e+00, -1.5050e-02, -5.3735e-02,  1.2930e-03,
        -1.4870e-02,  8.8192e-04, -7.3884e-03,  1.1115e-02,  9.4076e-03,
         1.0727e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1635e-03,  7.3221e-01,  2.6916e-03,  6.1256e-04, -4.5408e-03,
         1.1432e-03,  1.0862e-03, -3.3589e-03, -1.2196e-03, -2.2127e-03,
         2.1916e-03,  4.4301e-03, -3.9212e-03, -6.6948e-04,  3.6194e-04,
        -1.0292e-03, -1.3182e-03, -1.0373e-03, -2.2811e-03, -1.9354e-03,
         3.6898e-03,  8.2660e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0316e+00,  4.4524e+01,  6.6691e-01, -7.6343e-02,  1.8857e-01,
         5.7183e-01,  3.4158e-02, -3.6255e-02, -8.3899e-02, -9.2221e-02,
         2.5934e-01,  2.0591e-01,  4.7165e-02, -1.2806e-01,  2.0784e-01,
         1.6642e-02,  5.6004e-02,  9.6872e-02, -8.5516e-02,  1.1838e-01,
         2.4597e-02, -3.8537e-02, -1.5178e-01, -4.7581e-03,  2.0301e-01,
         9.1221e-02,  1.6255e-01,  3.8581e-03, -2.0677e-01, -7.0651e-02,
         1.5150e-01,  3.4725e-02,  2.1248e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2516e-01,  7.5358e+01,  8.3710e-01,  6.1173e-01, -1.4725e-02,
        -2.4227e-01,  3.6887e-01, -7.7607e-01,  1.0148e-01, -1.1309e-01,
        -1.3469e-01,  1.4506e-01,  2.3051e-01,  1.3800e-01, -1.4636e-01,
         5.9221e-02,  1.6415e-01, -6.8974e-02,  1.5568e-01,  1.3304e-01,
         3.5193e-01, -1.1357e-01, -6.7180e-02, -1.2737e-01, -3.9250e-01,
         6.3660e-01,  5.5041e-01, -1.7569e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3558e-01,  1.1746e+01,  2.8979e-02,  4.9430e-02,  2.5643e-03,
         5.2358e-03,  2.5971e-02,  9.2353e-02,  2.5347e-02,  1.0880e-01,
        -1.2247e-02, -2.6481e-02, -1.1051e-02, -9.4127e-03, -2.1958e-02,
         6.4838e-03,  3.2743e-02, -7.2249e-02, -1.1735e-02, -4.9370e-02,
        -1.0930e-02,  2.9464e-02, -1.2529e-02, -3.5961e-02,  5.5603e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 1.3647e-02,  1.6327e+01,  2.1746e-01,  2.4256e-01, -4.7244e-02,
         9.6773e-04,  4.1223e-02,  2.6519e-02,  1.8125e-02,  5.4176e-02,
         7.8822e-02,  1.1353e-01, -2.5942e-02,  6.4655e-02, -3.1861e-03,
        -6.6653e-02,  4.9266e-04,  1.8657e-02,  1.7856e-02, -4.4177e-02,
         5.5949e-02, -5.8423e-03,  4.9523e-02,  9.2123e-03,  4.5706e-02,
         1.8129e-04,  1.6409e-02,  1.8788e-03, -2.8795e-02, -4.6114e-02,
        -2.1095e-01, -1.0078e-02,  3.4124e-02,  3.8634e-03,  3.5539e-02,
        -3.3982e-02, -3.3896e-03,  4.7301e-02,  4.8013e-02, -2.6507e-02,
        -3.9590e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.9911e-01,  5.6349e+01,  8.3398e-01,  1.5943e-01,  1.9459e-01,
        -1.3442e-01, -2.5166e-01, -4.3335e-02,  2.2880e-02, -6.2110e-02,
        -1.8064e-02, -5.3473e-01, -1.9793e-02,  6.4485e-02,  3.5502e-01,
        -1.8643e-01,  6.2515e-02, -1.1901e-01,  1.1549e-01, -1.0630e-01,
        -1.9810e-01, -2.4594e-01, -1.1215e-01, -2.1450e-01, -3.1354e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.0254e-02,  3.8201e+00, -2.9968e-02,  5.5586e-03,  3.2237e-02,
         1.7485e-02,  2.4919e-02,  1.4303e-02, -9.1992e-03, -2.1737e-03,
         7.6928e-03,  1.9819e-02,  1.0911e-02, -2.3908e-03,  5.9396e-03,
        -6.5283e-04,  1.1131e-02,  1.3286e-02,  9.3093e-03,  6.7806e-03,
        -1.6683e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6284e-02,  2.5764e+00,  1.5587e-03, -1.1067e-02, -1.6393e-02,
         3.3460e-03,  2.3526e-02, -1.0781e-03,  4.2494e-03, -2.1208e-04,
         1.0326e-02, -2.4153e-02,  6.2087e-03, -4.6042e-03, -5.1934e-03,
        -9.4721e-04, -6.2655e-03, -1.5138e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0064e-01,  1.2228e+01,  1.0715e-01, -1.4401e-01, -2.4526e-02,
         1.7668e-02,  2.1099e-02, -2.9894e-02, -3.5972e-02,  2.8686e-02,
         7.1582e-03,  5.1040e-02, -2.1215e-03,  1.1127e-02,  2.7439e-02,
         6.9640e-02,  3.8463e-02,  8.5789e-03, -8.7027e-03, -3.1930e-02,
        -2.0367e-02, -1.4438e-02, -7.9744e-03, -1.1786e-02,  3.5205e-02,
        -7.8789e-03, -6.0001e-03,  2.3632e-02, -1.9975e-02, -4.4823e-03,
         1.5773e-02,  2.1789e-02, -2.2013e-02,  6.9818e-03,  6.2068e-02,
         6.2380e-02, -2.4344e-02, -1.6330e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.3936e-03,  3.0760e+01,  2.6429e-02,  4.9139e-03,  6.7452e-02,
         1.1677e-01,  2.2235e-02, -7.5083e-02,  9.9499e-02,  2.4828e-02,
        -6.1497e-03, -1.2453e-01, -2.4976e-02, -1.0593e-01,  2.0610e-02,
         1.3728e-02,  5.0202e-02, -5.9678e-02,  6.0311e-02,  1.8375e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5655, 33.3067, -0.4343,  0.5723, -0.0778,  0.1320,  0.0690,  0.1821,
         0.0400,  0.1688,  0.1749,  0.0684,  0.0490,  0.2439,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2554e-02,  2.8685e+00,  1.0379e-02,  2.1030e-03,  5.1873e-03,
         8.2108e-04, -9.4260e-03,  1.0520e-03,  4.4247e-03,  5.5529e-03,
        -9.7098e-03,  1.5804e-02,  4.3550e-03,  1.3987e-02,  9.4327e-03,
        -1.7344e-03,  5.9376e-03,  2.8741e-03,  2.7353e-03,  9.9669e-03,
         2.1090e-03,  9.6186e-03, -2.0403e-03, -4.6972e-03,  5.9118e-03,
        -2.4420e-03, -1.4785e-03, -7.1543e-03,  1.0205e-02, -2.1004e-03,
        -8.2028e-04, -7.8222e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4231e+00, -3.9724e+01,  3.6339e-01,  1.3359e-01,  2.2227e-01,
         8.8484e-02,  5.0426e-02,  5.2469e-01, -9.6341e-02,  7.0145e-02,
         1.8286e-02,  1.2360e-01,  7.8105e-02,  3.4095e-02, -6.4403e-02,
         4.9825e-02,  9.7106e-02,  4.8848e-01,  4.1478e-02,  5.7795e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5246e-02,  5.8247e+00, -4.0591e-02, -3.2798e-02, -4.6948e-04,
        -1.2842e-02,  2.3112e-03, -1.9137e-02, -7.4086e-03, -2.6885e-02,
        -7.8315e-03, -6.8266e-03,  2.0415e-03, -5.4976e-03, -2.0662e-02,
        -2.1390e-02, -2.6913e-02, -3.4254e-03,  1.2731e-03, -1.6514e-02,
        -1.2611e-03,  1.5927e-02, -1.2117e-02,  8.3424e-03,  4.6894e-03,
        -1.5904e-02, -5.0383e-02, -1.3442e-02, -3.2354e-02, -1.2717e-02,
         1.5410e-02, -4.5905e-03, -1.6008e-02,  1.7016e-02,  1.3831e-03,
         1.5485e-02, -1.0666e-02, -1.1010e-02,  1.4740e-03, -9.3379e-03,
         1.7806e-03,  5.1237e-03,  8.1092e-03, -8.2884e-03, -5.6936e-04,
         1.2726e-02,  1.9703e-02,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8754e-01,  1.1076e+01,  9.9116e-02, -1.4221e-02,  1.1156e-01,
         4.3827e-02, -2.4353e-02, -8.3922e-02, -8.3111e-02, -6.2334e-02,
        -8.6568e-02, -9.1139e-03,  8.5737e-02,  6.5700e-02, -6.1468e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.6957e-02,  3.4011e+01, -2.3726e-01,  5.9579e-02,  9.4636e-02,
         7.3242e-02,  1.1261e-01, -2.4218e-01,  7.5712e-02, -3.7561e-02,
        -1.0102e-01,  4.6362e-02,  5.3105e-02,  1.7840e-01, -5.2078e-02,
        -1.1054e-01,  5.8660e-03,  2.8119e-02, -8.2711e-03, -2.3865e-02,
         2.8355e-02, -1.0068e-01,  2.9958e-02,  1.1057e-01,  3.1467e-02,
         1.0423e-01,  8.5754e-02, -2.5002e-01,  1.2621e-01, -6.7717e-04,
         3.7636e-02,  5.1181e-02, -2.6033e-02,  3.0505e-02, -2.1503e-02,
         5.2439e-02, -3.6662e-02, -7.4877e-02, -5.0530e-02, -4.0295e-02,
         4.6760e-02, -2.3990e-02, -4.3105e-02, -4.9197e-02, -2.4385e-02,
        -5.2099e-02,  5.0163e-02,  9.5575e-02], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-5.6929e-02,  7.6602e+00,  7.0021e-02, -3.2955e-02, -4.0685e-03,
        -1.2157e-01, -2.5803e-02, -9.8185e-04, -4.5773e-02,  1.4140e-02,
         2.1553e-02, -8.3188e-02,  1.3733e-02,  1.0526e-02, -1.4546e-02,
         6.0979e-03, -7.7288e-02, -9.0992e-02, -1.9883e-04, -2.7594e-02,
         1.6678e-02, -3.4475e-02,  3.1369e-02,  6.3286e-03, -2.4675e-03,
         1.1293e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0368e-01,  1.0554e+02,  2.6338e-01, -8.3025e-01, -2.0949e-01,
        -1.3259e-01, -1.5310e-01, -4.1824e-03, -8.1999e-02,  2.2421e-02,
         6.1924e-01,  4.4150e-02, -4.1003e-02,  8.8147e-03, -4.2615e-01,
         5.7654e-01,  5.2732e-01, -6.8090e-02, -4.1928e-01, -3.1839e-02,
        -5.4083e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6946e-01,  4.8601e+00, -6.8114e-02, -8.5193e-02, -2.1725e-02,
        -3.9531e-02,  2.8845e-03, -2.6616e-02, -2.2897e-02, -3.7936e-03,
         5.4116e-03,  6.2875e-04, -1.4710e-02, -3.8303e-02,  4.8369e-02,
        -2.4255e-02, -3.0834e-03, -2.2236e-02,  1.4319e-02, -2.6590e-03,
         1.7992e-02, -6.3463e-03, -8.4193e-02,  9.8618e-03,  1.5460e-02,
        -2.1392e-03, -2.6106e-03,  4.2234e-03,  9.1461e-03,  3.9848e-03,
         5.7836e-04, -3.0788e-02,  2.1441e-02, -1.5755e-02,  1.4923e-02,
        -2.2271e-02,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0020e-02,  7.0647e+00,  7.1482e-03,  2.7940e-02,  7.6836e-02,
         5.3438e-02,  2.4686e-03,  6.8811e-03,  1.5068e-02,  1.5805e-02,
        -1.5646e-02,  5.6070e-03, -5.5667e-03, -3.8378e-02, -1.0707e-02,
         2.3992e-02, -2.9374e-03,  2.7863e-02,  3.0145e-02,  2.1083e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4801e-01,  1.4588e+01, -1.5367e-01,  1.0816e-01,  1.7997e-02,
        -1.7194e-02, -1.6432e-02, -1.1640e-02, -5.1061e-03, -2.2286e-02,
        -5.9476e-02, -6.6222e-02, -3.5676e-02, -3.2195e-02, -1.1636e-02,
        -9.0497e-02, -1.5333e-02, -1.7292e-02, -1.0264e-02,  3.0858e-03,
         7.3529e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2569e+00,  8.9460e+01, -1.6611e+00,  3.6579e-01, -1.4840e-01,
        -2.0089e-01,  1.0624e-02,  1.7589e-01, -1.6950e-01,  3.7358e-01,
         1.1327e-01,  3.3664e-01,  8.2873e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.9057e-02,  3.4518e+01, -1.0295e-01, -1.9414e-01,  5.0376e-02,
        -1.6447e-01,  5.2302e-02, -3.3261e-02, -1.1481e-01, -1.2001e-02,
         2.1120e-02, -3.2643e-01, -1.3108e-01, -8.4569e-02, -3.8935e-02,
         2.7622e-02, -2.2790e-02,  7.0479e-02, -4.9441e-02, -9.8957e-02,
        -1.1708e-01,  5.1368e-02, -1.1960e-01,  1.6466e-02,  1.0325e-02,
         6.9845e-02, -6.4147e-02,  3.9234e-02,  7.7452e-02, -1.5681e-02,
         1.3497e-02,  5.7806e-02,  7.9909e-02, -1.0602e-01,  4.5892e-02,
         2.1581e-01,  2.4288e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4504e-02,  1.5355e+01, -2.2879e-01, -6.9793e-02,  3.9058e-02,
        -5.5577e-02,  3.4667e-02,  9.7333e-02, -1.5294e-02,  2.2201e-02,
        -2.5733e-02,  1.0257e-01,  1.1168e-02, -4.8117e-03, -7.6929e-02,
        -6.0230e-02,  4.4775e-02,  2.1255e-02, -1.8900e-02,  2.3723e-01,
        -8.1130e-03, -2.0410e-02,  2.5023e-01, -3.6777e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8639e-02,  6.9202e+01,  1.3774e+00,  2.3199e-01,  3.6163e-01,
         9.3344e-02,  4.7611e-02, -2.0468e-01, -2.1254e-01, -1.4269e-01,
        -6.8147e-02, -1.4623e-01, -7.8002e-01,  4.8795e-02,  2.0308e-01,
        -8.3867e-02, -1.2937e-02,  4.5677e-02,  4.5194e-02, -1.2809e-02,
         6.9367e-04,  6.9586e-03, -2.5768e-02, -2.8084e-02,  1.1302e-02,
         1.3547e-02,  6.7281e-02, -3.5946e-02, -9.8212e-02, -2.5109e-02,
        -4.1093e-02,  3.6735e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3523e-01, -1.3705e+02, -1.1021e+00,  7.3648e-01, -2.2707e-01,
         2.9669e+00, -2.8349e-02,  4.1396e-01, -4.1455e-01, -5.6689e-01,
        -6.9967e-02,  1.7981e-01,  1.7652e-01,  1.2034e-01, -3.0159e-02,
        -1.2250e-01,  7.4161e-02,  5.3357e-02, -1.7015e-01, -8.4785e-02,
         1.1939e+00,  8.3448e-01,  9.7517e-02,  4.6710e-01, -8.4240e-02,
        -9.9228e-02, -8.0905e-02,  3.0116e-01,  1.7048e-01,  6.6421e-01,
        -1.8280e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1316e+00,  7.8197e+01,  1.9571e-01,  6.1907e-01,  4.3849e-02,
         3.1850e-01,  6.6214e-01, -8.0686e-01,  8.8648e-02, -1.7082e-01,
         6.5348e-02, -1.6762e-01, -1.8924e-01, -1.6276e-01,  1.1003e-01,
         1.8571e-01,  3.9918e-01, -1.8642e-02,  1.2975e-01, -4.3486e-02,
        -2.2316e-01, -3.3852e-01, -1.5095e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.1408e-02,  5.1024e+01, -3.2246e-01, -3.2848e-01, -8.9719e-02,
         2.3068e-01,  5.7357e-02, -1.0039e-01,  1.7497e-01,  1.6129e-01,
         7.7102e-02, -2.0629e-01,  8.1372e-02,  8.1435e-02,  2.8350e-02,
         7.5407e-02, -4.6339e-02, -1.7746e-01, -6.3219e-03,  2.8384e-01,
        -8.5454e-02, -6.6889e-02,  3.0410e-01, -5.1826e-02, -4.2725e-02,
         1.1546e-01,  1.7166e-01,  6.1843e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 1.4149e-02,  8.6805e+00, -1.9018e-02, -1.9948e-02,  1.1995e-02,
        -5.8756e-03,  2.5382e-02, -2.0650e-02, -6.6953e-02, -2.4183e-02,
         6.9341e-03, -8.2459e-03,  1.0910e-02, -1.5609e-02, -1.5442e-02,
         3.3133e-02, -2.8225e-03,  2.4447e-02, -1.3994e-02,  6.2059e-03,
         7.0859e-03, -4.1227e-03, -7.8831e-03,  4.3539e-03, -9.4711e-03,
        -1.0865e-02,  3.0066e-02,  1.6630e-02,  1.3723e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9642e-01,  5.9306e+01, -3.9003e-01,  5.9346e-02,  6.6538e-01,
         6.9560e-01,  2.5640e-01, -1.6802e-01, -1.2072e-01,  1.9083e-01,
         1.0796e-01, -4.1384e-01, -1.0577e-01,  3.0067e-01,  1.8119e-01,
        -2.3160e-02,  2.0244e-01,  1.7002e-01,  1.7294e-01,  1.1775e-01,
        -9.1469e-02,  5.1676e-01,  6.5596e-02,  2.4737e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6233e-01,  3.3240e+01, -2.6512e-02,  1.0831e-01,  1.3012e-01,
         2.0092e-01, -6.3571e-02,  4.0754e-02, -1.0816e-01, -4.3157e-02,
        -1.0545e-01, -1.3586e-01,  1.3154e-01, -9.5973e-03, -5.0394e-02,
         2.0682e-01, -4.8854e-02,  1.5857e-02,  1.0203e-01,  5.3627e-02,
        -5.6475e-02, -3.1910e-02,  1.8561e-01,  4.3689e-02,  4.8110e-03,
         2.3608e-02, -2.9400e-01, -9.1374e-03, -4.6280e-02,  4.4229e-02,
        -2.1530e-02,  1.5434e-01, -7.8972e-02, -4.4912e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2987e-02,  3.4424e+00, -3.3878e-02, -3.1502e-02,  6.6589e-03,
        -1.4691e-02, -5.9159e-04,  1.1452e-02,  5.7915e-03, -7.2611e-03,
        -6.5337e-03, -9.7465e-03,  5.7455e-03, -7.4167e-03,  4.9811e-03,
         1.3955e-03, -1.3828e-02,  4.0418e-03, -1.0696e-02, -1.7611e-02,
         3.3329e-02,  1.2158e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2014e-02,  9.7507e-01, -8.3230e-03,  4.4856e-03,  4.3758e-03,
        -1.6370e-03, -8.8298e-06,  8.9088e-04,  1.8193e-03,  3.5504e-03,
         2.3281e-03, -6.3655e-03,  3.7508e-04, -3.0674e-03, -8.7757e-04,
        -1.0973e-02, -7.9659e-03, -3.9971e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2301e+00,  7.5769e+01, -4.2948e-01,  4.9192e-02, -1.9752e-01,
        -2.7658e-01,  1.4251e-01, -2.8791e-01,  1.9185e-01,  5.8635e-01,
        -7.8120e-02,  1.0075e-01, -2.8393e-01,  3.8741e-01, -1.1270e-01,
         4.6233e-02, -2.3916e-03, -5.0557e-01, -5.7984e-01, -9.8395e-02,
         1.8342e-01, -1.6751e-01, -8.7119e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5794e-02,  1.3637e+02,  3.9931e-01,  2.8589e-01,  3.2817e-02,
         7.3650e-02,  3.4208e-01,  1.9967e-01, -1.4738e+00,  8.3787e-02,
        -1.0767e-01, -9.1980e-02,  1.2957e-01, -3.2055e-01, -1.5271e-01,
        -5.4645e-01,  2.4348e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.9123e-01, -2.1003e+01, -5.6681e-02,  1.5146e-01, -9.9290e-03,
        -1.2682e-02, -4.6970e-03, -3.3364e-02, -5.4844e-02, -2.8505e-02,
         2.5952e-02,  9.6375e-02, -2.8204e-02,  2.7194e-02, -8.0429e-02,
        -9.0531e-03,  2.7630e-02,  2.4431e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9715e-03,  8.4701e+00, -1.7883e-01, -2.6765e-02,  3.7315e-02,
        -1.6162e-02, -8.8511e-03,  3.3332e-02, -4.7070e-03,  1.9430e-01,
         3.0343e-02, -3.5582e-02, -2.1373e-02, -5.7632e-02,  9.5274e-04,
         1.1352e-01, -1.3414e-01,  1.0819e-02,  2.5793e-02, -1.5065e-02,
         3.8571e-02, -4.4261e-02, -1.1812e-02, -8.9075e-03,  8.1552e-03,
         3.9805e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7549e-02,  2.4462e+00, -1.5138e-03, -1.7700e-02, -4.1020e-03,
         1.9974e-02,  9.5279e-03,  5.4324e-03,  1.0682e-02, -1.5831e-03,
        -1.6060e-02, -5.8475e-03, -6.2434e-03, -3.1419e-02, -1.5805e-02,
        -1.2861e-04, -6.8177e-03,  7.0832e-03,  7.6492e-03, -5.8179e-03,
        -6.5201e-03, -5.1487e-03, -4.9053e-05,  5.5465e-03, -2.9371e-03,
        -2.1030e-03, -7.3315e-03,  6.3122e-03, -1.1025e-03,  6.0866e-03,
         4.3832e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.6893e-03,  2.1915e+00,  4.5511e-02,  9.5383e-03,  7.0062e-04,
        -5.4789e-03,  3.2261e-03,  7.4419e-03, -2.7551e-03,  1.7454e-02,
         1.6685e-03, -6.6563e-03, -1.5796e-02, -6.3104e-03, -7.2614e-03,
         4.2003e-03, -8.8813e-03, -2.4873e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0981,  2.5512, -0.0605, -0.0119, -0.0042, -0.0161, -0.0135,  0.0509,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 2.2235e-01,  1.1396e+01,  1.2246e-01, -4.9849e-02, -5.9569e-02,
         2.0860e-02, -2.6064e-02, -1.6027e-02, -3.1628e-02,  4.4583e-02,
         2.2321e-02,  4.6201e-02, -6.4699e-02, -4.7066e-02, -1.7631e-02,
         1.8124e-03,  9.3487e-04, -4.2431e-02,  8.2679e-03, -9.5810e-04,
         5.9121e-03,  4.1448e-02, -1.7373e-02,  2.1371e-02,  1.8681e-02,
        -3.9339e-03, -3.0765e-02,  3.6927e-02, -8.8419e-03,  3.8410e-02,
         2.9563e-02,  1.8716e-02,  1.2278e-02,  2.9101e-02,  4.4918e-02,
        -3.3858e-02, -7.0774e-03,  3.7065e-02,  9.7639e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4324e-01,  2.5131e+01,  1.9335e-02, -7.0431e-02, -2.4536e-02,
         5.0156e-02,  1.5581e-02,  3.0003e-02, -1.2206e-01,  1.4805e-01,
        -6.8709e-02, -2.1077e-02,  2.7399e-02, -2.2897e-02, -3.6711e-02,
         5.2531e-02,  3.3584e-02, -4.5041e-02, -5.2615e-03,  4.9242e-02,
         1.0621e-01, -6.3499e-02,  3.6854e-02,  1.0829e-02,  8.9700e-04,
         2.1968e-02,  1.3172e-02, -1.3690e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.1735e-02, -1.5312e+02, -7.2733e-01, -1.9843e-01, -4.5274e-01,
        -6.8789e-01, -2.5593e-01, -6.5950e-01, -1.1879e+00, -1.1825e+00,
        -3.7322e-01, -5.9180e-02,  1.3148e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6987e-02,  6.9520e+01, -7.1544e-01, -2.6866e-01,  1.2680e-02,
        -1.3886e-01,  3.4345e-01, -4.4004e-01,  5.0777e-01,  1.2375e-02,
         7.4381e-02, -6.5100e-01, -2.2275e-01, -7.3099e-02,  9.5003e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.3368e-01,  3.8598e+01, -1.1232e+00,  3.1849e-02,  1.7269e-01,
        -3.7039e-01,  2.7250e-01,  1.6440e-02,  1.3179e-01, -1.5180e-01,
         6.2308e-02,  1.0550e-01,  7.2591e-02,  7.7492e-02,  8.8650e-02,
         1.0042e-02,  1.4696e-01, -4.5397e-02,  4.2407e-02,  7.1417e-04,
         3.2984e-05,  2.8553e-02,  2.9564e-02, -6.0751e-02, -1.9948e-02,
        -8.5254e-02,  9.8195e-02,  2.9523e-01, -4.6303e-02,  1.3203e-01,
         1.1001e-02,  2.7999e-02, -4.3349e-02,  7.7213e-02,  7.7663e-02,
         2.3620e-02,  1.0700e-01,  6.1218e-02, -9.1357e-02,  5.8416e-02,
         9.2634e-02, -2.2785e-02,  2.0311e-02,  1.2942e-02, -1.0014e-01,
        -3.5250e-02,  7.7944e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9792e-02,  1.3921e+01, -2.8852e-02,  1.2509e-02, -6.2446e-02,
        -1.7323e-02, -3.3088e-03,  3.0300e-03, -1.8206e-01,  1.0530e-02,
        -1.0591e-02, -5.8699e-02, -9.0489e-03, -1.6599e-02, -3.7074e-03,
        -7.7410e-03,  4.1182e-02,  4.1638e-02, -4.4540e-02,  7.8454e-03,
         4.5439e-04, -3.5489e-02,  1.3089e-02, -1.1652e-02, -8.9181e-03,
        -7.0504e-03, -8.3298e-02, -8.7677e-03, -1.7148e-02, -3.3030e-02,
        -1.0214e-02, -8.3140e-03,  4.2780e-02, -1.8948e-03,  2.8197e-02,
        -2.1635e-03, -1.2831e-02, -2.5104e-03,  1.2457e-02,  1.2686e-03,
        -3.2855e-03,  1.7576e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2148e+00,  8.0136e+01,  1.5931e-01, -6.1966e-01, -2.1236e-01,
         2.1829e-01,  8.3369e-01, -1.8449e-01, -4.4555e-01,  8.1803e-02,
        -3.3065e-01,  8.7436e-02, -2.5056e-01, -2.0397e-01,  6.0090e-01,
         7.5177e-03,  2.2429e-01,  3.2721e-02, -7.1295e-02,  1.4879e-01,
        -1.6342e-01,  4.0851e-02, -2.2958e-01,  4.0554e-02, -4.9037e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1001e-01,  2.4934e+01,  2.3264e-01, -1.2850e-02,  3.5788e-02,
        -4.1368e-01,  3.3612e-02, -2.0379e-02,  8.5621e-02,  4.6387e-03,
         1.2171e-02, -1.0237e-01, -7.4882e-04, -1.7457e-01,  1.3685e-01,
        -9.3337e-03,  3.3881e-02,  2.6320e-01, -3.0494e-02,  6.0092e-02,
        -2.7139e-02, -3.6890e-02, -2.3806e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2850e-02,  3.5604e+01, -2.5262e-01,  5.4979e-01, -1.2116e-01,
        -8.6084e-02,  1.7378e-01,  1.5589e-02,  1.2515e-02,  6.2059e-01,
        -1.0636e-01,  7.0590e-02,  3.0273e-01,  3.6670e-02,  2.5589e-01,
        -4.4464e-02,  1.6667e-01,  8.7374e-02,  1.8446e-01, -1.0399e-01,
         1.1168e-01, -2.2652e-01, -1.9321e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7889e-02,  6.8164e-01,  9.6895e-03, -6.4333e-03, -3.4072e-03,
        -1.4486e-03,  7.0887e-04,  6.1615e-03,  2.9998e-03,  6.1745e-04,
        -1.3473e-02,  2.9459e-03, -5.6556e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2412e-01,  6.2690e+01,  1.6113e-01, -1.9085e-01,  1.6805e-02,
        -4.7310e-02,  1.1332e-01, -5.8432e-02,  1.0147e-02,  2.3093e-01,
        -3.9608e-01,  6.9117e-02, -3.2557e-02, -7.6453e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1188e-01,  1.1929e+02, -1.0777e+00, -9.8771e-01,  2.8752e-01,
        -3.3788e-01, -1.4538e-01, -1.9363e-01,  2.1039e-01,  4.1919e-01,
         3.6516e-01,  1.5033e-01, -5.9858e-02,  1.1270e-01, -5.3730e-02,
        -3.8056e-01, -6.1136e-01, -1.2432e-02,  3.8765e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-1.1154e-01, -1.1580e+02,  1.1048e-01,  3.5897e-01,  6.5806e-02,
        -1.8139e-01,  4.9902e-02,  1.2207e-01, -4.2367e-02, -5.6842e-02,
        -7.7797e-02, -5.2548e-01,  9.9371e-02, -4.6378e-01, -3.7234e-01,
         7.9229e-01, -3.6524e-01,  1.0022e-01, -2.2255e-01, -7.5571e-01,
        -3.4808e-01, -5.0161e-02,  1.3929e-01, -4.6400e-01, -5.8008e-02,
        -2.5935e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0201,  1.1849,  0.0086,  0.0130, -0.0125,  0.0016, -0.0034, -0.0045,
         0.0063, -0.0024,  0.0047, -0.0050,  0.0029,  0.0064,  0.0078,  0.0042,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8139, 64.3874,  1.1801,  3.0290,  0.4127,  0.1015,  0.0789,  0.5910,
        -0.0644,  0.0879,  0.0970, -0.3481,  0.1670,  0.2206,  0.4466,  0.1973,
         0.4667,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8215e+00, -9.9517e+01, -2.1815e+00, -1.4018e-01,  6.2879e-03,
        -1.6375e-01, -5.0775e-02, -1.9533e+00, -8.9632e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7809e-02,  4.7579e+00,  1.6794e-02, -1.0419e-02,  9.3249e-03,
         7.5421e-03,  2.3024e-02,  1.4049e-02, -2.2122e-02,  8.6403e-03,
         3.3883e-02,  4.7771e-02,  1.0796e-02,  3.1173e-02,  3.6285e-03,
         2.0879e-02,  2.9910e-02,  2.3745e-02,  6.4809e-03,  1.9383e-02,
         1.0105e-02, -1.4644e-02,  4.9570e-03, -1.1369e-02, -1.5525e-02,
        -8.1283e-03, -1.1553e-02, -1.6653e-02,  1.1066e-02,  1.9461e-03,
         9.1995e-03,  3.8368e-04,  1.8013e-02,  1.0470e-03, -2.3711e-03,
         4.9839e-03, -2.1620e-02, -1.2519e-03, -1.6155e-02, -2.3406e-03,
        -6.5971e-03, -8.0854e-03, -1.0511e-02, -1.0474e-02,  1.5093e-03,
        -1.0469e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3197e-01,  6.4011e+00, -2.0335e-01, -6.5172e-02, -2.0758e-02,
        -2.3807e-02,  5.0168e-02, -1.4691e-03, -1.4280e-02,  1.6693e-02,
        -1.4546e-02, -8.2034e-03, -6.1020e-03, -1.1193e-02,  1.8145e-03,
         1.4288e-02,  1.1605e-02,  1.8094e-02, -1.6671e-02,  3.0414e-03,
        -1.9978e-02, -8.9912e-03, -9.6174e-03, -1.1649e-04,  9.9125e-03,
        -2.9319e-02,  1.5230e-02, -1.5397e-02,  8.4952e-03, -1.7507e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1003e+00,  1.3133e+02, -2.3123e+00,  5.7700e-01,  1.4707e-02,
         1.0928e+00, -6.4554e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([   1.1699, -147.5174,   -1.3083,   -0.5992,   -0.2609,    0.4060,
          -1.2740,   -0.8739,   -0.4362,    0.3784,    2.0599,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.8101e-02,  7.1767e+00,  1.3023e-01, -1.3998e-02,  7.7678e-03,
         5.6977e-02, -2.0421e-02,  3.2620e-03,  7.6733e-02,  2.1671e-03,
        -1.1349e-02,  3.8401e-02, -1.0645e-02, -2.9551e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0715e-02,  1.9277e+01,  5.3557e-02, -1.9383e-02, -1.6693e-02,
         2.6207e-03, -9.9073e-03, -4.8199e-02,  6.3406e-03, -1.3264e-02,
         8.4298e-03, -3.4104e-02, -6.1908e-02, -1.5598e-02,  2.3657e-02,
         3.9978e-02,  6.1049e-03, -4.7591e-02, -2.0512e-02,  5.4418e-02,
         2.4941e-02,  4.1824e-02, -1.8202e-02, -6.0392e-04, -2.8088e-02,
         9.3696e-03,  2.9631e-02,  1.5045e-02, -6.2164e-02, -1.7552e-03,
        -2.6948e-02, -3.9457e-02,  6.9975e-02,  2.8637e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4746e-03,  2.3309e-01,  7.8726e-04,  2.6615e-03, -9.9111e-05,
        -5.0616e-04, -5.1214e-04, -1.9587e-03, -8.7104e-04,  5.2408e-04,
        -1.1225e-03, -4.7939e-04,  1.7871e-04,  1.2011e-04,  2.7404e-03,
        -9.9980e-04, -4.2959e-03,  2.5427e-04, -7.0163e-04,  8.6216e-05,
        -2.5042e-04,  1.6585e-04, -9.6083e-04,  1.1316e-04, -4.8952e-04,
         9.1567e-06, -1.3111e-03, -4.9126e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7715e-02,  1.2240e+00,  1.8465e-02, -1.4933e-03,  7.9796e-03,
        -9.2506e-03, -2.9790e-04,  1.2403e-03, -2.2694e-03, -7.8144e-03,
        -2.0438e-03, -5.0510e-04, -4.1014e-03,  6.2170e-03, -7.7182e-03,
        -8.3141e-06, -4.5843e-03, -3.8234e-04,  1.4548e-03, -6.7120e-04,
        -4.0811e-03, -1.9655e-03, -8.1972e-03,  5.2680e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 6.8700e-01,  8.5197e+01, -5.3003e-01,  6.4819e-01, -2.9629e-01,
        -8.4373e-01,  3.4069e-01,  5.3901e-02,  4.8484e-01, -7.0179e-02,
        -5.5159e-02,  2.0951e-01,  1.3278e-01,  1.2105e-01,  2.3257e-03,
        -1.7155e-01, -1.2620e-01, -2.1878e-01,  2.4068e-01,  1.1140e-01,
         2.2428e-01, -1.2032e-01,  7.9898e-02, -3.1057e-01, -2.8623e-01,
         3.4551e-01,  6.4036e-01, -4.6604e-02,  2.5849e-02,  3.4361e-01,
         6.3946e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2004e-01,  2.8779e+01,  5.2115e-02, -1.2282e-01,  8.6492e-04,
        -2.6387e-01, -2.7440e-02,  1.2778e-02,  2.3299e-01, -6.9414e-03,
         1.5361e-03, -5.0705e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7160e-03,  9.4766e-01,  1.7310e-03,  7.9198e-04,  9.8560e-04,
        -1.6881e-03, -6.6920e-04, -1.5239e-03,  2.5548e-03,  8.9714e-03,
        -2.9746e-03,  5.7094e-03,  5.0691e-03, -3.9362e-03, -8.6124e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.0694e-01,  1.0522e+02, -3.1786e-01,  1.2018e+00, -3.9933e-01,
         1.0812e-01,  4.1033e-01, -5.0905e-02,  2.2462e-02,  2.3130e-01,
         1.2963e-01,  2.1639e-01, -2.7500e-01,  3.1357e-01,  7.7601e-02,
         5.2251e-02,  2.1736e-02,  3.1882e-01,  3.1088e-03, -9.4503e-02,
         3.3991e-02,  9.6738e-02, -4.3286e-02, -3.0647e-02,  1.4572e-03,
         1.5466e-01, -8.7525e-02, -8.7495e-03,  1.4525e-01,  1.0484e-03,
        -9.0334e-02, -4.6415e-02,  1.1138e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.0788e-02,  8.0422e+00,  2.4496e-01, -8.7043e-02,  4.0608e-02,
        -2.5844e-03, -1.8901e-02, -8.6248e-02, -1.7538e-02, -3.1428e-02,
        -6.7791e-02, -4.6792e-03, -3.1938e-02,  2.5241e-02,  4.4093e-02,
         9.7241e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4737e-02,  3.3926e+01,  1.9022e-01, -4.6997e-01, -9.9843e-02,
        -1.9277e-01, -6.7685e-01, -2.7103e-01, -1.0424e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0912,  4.0689,  0.0393, -0.1351, -0.1199,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5262e+00,  1.4255e+02,  2.0002e-01, -3.3573e-02,  1.0384e-01,
        -9.1005e-01,  1.1730e-02,  9.5633e-02,  5.0811e-02, -4.7172e-01,
        -8.4909e-02, -4.5055e-01, -4.9927e-01, -4.6262e-01, -1.0540e-02,
        -2.7745e-01, -8.2364e-01, -4.0250e-01, -2.6682e-01, -5.1505e-02,
        -3.3003e-01,  1.7057e-01, -2.8295e-01, -5.2098e-01, -3.3892e-01,
        -7.8674e-01,  9.7061e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3179e+00,  1.2918e+02,  9.2423e-01, -1.2347e-02,  6.8097e-01,
         1.2333e-01,  1.0079e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.0151e-01, -1.5560e+02, -8.0701e-01, -4.3765e-01, -2.8722e-01,
        -1.2784e+00,  1.4463e+00, -2.6638e-01, -1.0767e-01, -2.2468e+00,
         3.2226e-01,  8.3985e-01,  4.7145e-01,  3.4164e-01,  8.7895e-01,
         6.4679e-01, -1.0370e-01,  3.4506e-02, -1.2198e-01,  1.1136e-01,
        -8.0307e-01, -5.4439e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.4414e-01,  5.6571e+01,  8.6479e-01,  3.7933e-01,  5.1918e-01,
        -8.6027e-02, -1.6401e-01, -4.7314e-02, -1.5279e-01, -5.3781e-01,
        -4.3551e-02, -1.0884e-01, -3.3443e-01, -4.6726e-01, -7.7744e-02,
        -8.3525e-02,  2.3296e-01, -1.0997e-01, -3.9084e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0247e-01,  1.1427e+01,  1.4003e-03,  3.6323e-02,  2.2103e-02,
        -2.2368e-02,  5.1453e-02,  3.5116e-02,  3.7560e-02,  3.5516e-02,
        -5.2088e-03, -5.6211e-05,  3.1283e-03, -4.5696e-03,  1.9564e-02,
        -1.8642e-02, -2.6886e-02, -8.4575e-03,  1.5112e-02,  1.3733e-02,
        -2.0595e-02, -4.4882e-02,  3.6105e-02, -1.9623e-02,  1.2600e-03,
         7.7145e-04, -1.8666e-02, -6.9184e-03,  6.3431e-03,  1.3865e-02,
        -4.1922e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 3.1089e+00, -9.6662e+01, -1.0374e+00, -4.7994e-01, -2.2480e-01,
         1.4156e-01, -1.7567e-01, -1.2190e-01,  1.8843e-01, -4.2306e-03,
        -1.8777e-01,  1.6467e-01,  8.7668e-02, -1.1858e+00,  3.3231e-01,
        -1.9363e-02, -7.8467e-02,  1.0723e-01, -3.0648e-01,  6.7396e-02,
        -4.6683e-01,  2.3868e-03, -2.7925e-01, -1.2061e-01,  4.1380e-02,
        -1.1461e-01,  1.0670e-01,  1.2575e+00,  2.7039e-01, -3.7384e-01,
        -8.3843e-01, -5.4264e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8635e-01, -8.3285e+01,  8.2400e-01,  9.4302e-02,  4.4613e-02,
        -5.5408e-03, -2.3513e-01, -2.3676e-01, -5.1304e-01, -1.7855e-01,
         3.4746e-01, -1.2151e-01, -2.7404e-01, -2.5601e-01,  1.7206e-01,
         2.5178e-01, -1.6560e-01, -1.3093e-01,  5.1002e-01,  8.7878e-02,
         2.7595e-01, -9.1866e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0610e+00,  1.0593e+02, -6.7352e-01, -1.1172e+00, -2.1495e-01,
         2.5288e-01, -1.7087e-01, -2.7543e-02,  2.0972e-01, -4.4866e-02,
        -1.2035e-01, -1.4478e-01, -5.5778e-01, -8.6329e-02, -1.0324e+00,
         2.0730e-01, -1.9838e-01, -1.0104e-01, -4.9019e-01,  2.2844e-01,
        -1.2781e-01, -1.5923e-01,  2.7970e-01,  7.3074e-02, -2.0027e-01,
         3.4235e-02, -8.4829e-02, -4.6904e-01,  1.5877e-01, -3.0162e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3433e-03,  1.2182e+00, -6.2475e-03, -5.6274e-03, -6.5080e-03,
        -3.4512e-03, -3.3371e-04, -9.3809e-05, -6.2741e-03, -2.4958e-03,
        -5.5770e-03,  1.6610e-03, -5.0985e-03,  1.2002e-03, -2.3264e-03,
        -3.3363e-03, -3.2367e-03, -1.0231e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0315e+00,  1.3940e+02,  3.3926e+00,  3.9390e-01, -2.8579e-01,
        -2.9430e-03,  3.7643e-01, -1.3952e-02, -2.4112e-01,  4.1957e-01,
         2.9069e-01,  7.7750e-02,  5.0467e-01,  2.8563e-01,  1.8466e-01,
         2.0923e-02,  8.6466e-03, -5.1025e-01, -5.8003e-01,  7.9631e-02,
        -1.3403e-01, -2.0814e-01,  2.8306e-01, -3.0679e-01,  2.5789e-01,
        -2.4985e-01, -6.3211e-01,  1.6385e-01, -1.4668e-02,  1.3776e-01,
         1.3588e-01,  1.0990e-01,  6.7052e-02, -9.5650e-01, -1.7161e-01,
        -2.0333e-01,  1.6467e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2061e-02,  4.4761e+00,  1.7039e-01,  1.2289e-02,  3.2118e-02,
         9.3939e-03,  2.2788e-02, -3.5329e-02,  3.1135e-03,  7.7305e-03,
         1.6807e-02, -2.1178e-03,  5.1671e-03,  1.6456e-02,  3.1100e-02,
         6.5264e-03,  9.4039e-03, -2.6343e-03, -7.3452e-04,  9.3895e-03,
        -7.5637e-03,  6.7702e-03,  2.7371e-02, -3.9503e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8699e-02,  3.4411e+00, -2.3218e-02, -2.7038e-02, -9.7428e-03,
         1.8128e-02, -1.5183e-02,  5.6696e-03,  1.3184e-02, -7.1402e-03,
        -3.7484e-03,  7.4556e-03,  3.7106e-04, -1.5720e-02, -3.9284e-03,
         5.6859e-03, -4.8800e-03, -2.6960e-03, -3.1630e-04, -4.5537e-03,
        -1.1679e-03, -1.3269e-02, -1.9674e-03,  6.5868e-03,  5.7253e-03,
         6.2474e-03, -4.7398e-03, -2.3936e-03,  3.0078e-03, -1.3655e-03,
         9.5326e-03,  1.6363e-03,  2.3525e-04,  3.6685e-04, -1.4239e-03,
         4.6566e-03, -5.9633e-03, -6.7245e-04, -3.9312e-03, -1.7948e-03,
         2.2270e-03, -2.6650e-03, -4.2174e-03, -3.1454e-03,  5.6111e-03,
         1.7686e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5542e-03,  1.4969e+00, -5.3930e-03, -1.0237e-02, -1.3239e-02,
        -1.4934e-03,  5.0099e-04, -1.8638e-03,  2.9949e-03, -1.3064e-03,
        -3.3207e-03,  3.4626e-03, -8.4881e-04, -4.2994e-03,  6.1255e-03,
        -3.7119e-03,  3.1696e-03, -1.2473e-02, -2.3509e-03, -7.5688e-04,
         5.0356e-03,  2.5307e-03,  1.6049e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5140e-01,  1.6349e+01, -2.5271e-01,  1.0508e-01,  1.7461e-02,
         1.0636e-02,  1.0722e-01, -1.7124e-02,  1.3019e-01,  8.2785e-02,
         2.0321e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2313e-01,  6.9144e+00,  7.8198e-02,  4.9384e-03,  2.6172e-02,
        -5.5597e-02, -1.9239e-02, -2.0438e-02,  2.6697e-02,  1.9991e-02,
        -3.2692e-03,  4.9453e-05,  2.9589e-02, -3.5585e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1869e+00,  1.0961e+02, -7.7473e-01,  1.5325e-01,  3.9959e-01,
        -9.5342e-02,  1.8160e-01,  1.7865e-01,  8.4555e-02, -1.8731e+00,
        -1.3343e+00, -2.4918e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.8153e-01,  3.8568e+01,  6.7383e-01,  3.8192e-01,  8.2144e-03,
         2.5253e-01, -1.9451e-01,  1.1572e-01,  1.5890e-02,  1.3064e-01,
        -1.0226e-02, -2.8487e-02, -5.7701e-03, -5.7085e-02,  4.5992e-03,
        -7.3915e-02,  1.9371e-02, -1.2283e-01, -3.4917e-02,  2.5431e-03,
        -5.1762e-02, -9.4520e-02,  4.0607e-01, -1.4392e-01,  1.6899e-02,
        -6.8767e-03,  6.7280e-01,  1.2300e-02, -5.6697e-02,  3.3965e-03,
        -9.8217e-02,  1.8670e-01, -4.9394e-02,  1.2710e-01, -1.0181e-02,
        -8.6100e-02, -3.1061e-02,  4.2260e-02,  1.0629e-02, -4.2003e-02,
         6.0787e-03,  2.3116e-02, -1.4407e-01,  2.5160e-02,  8.6626e-03,
        -5.7081e-02,  8.3700e-02, -1.1968e-02, -2.8637e-02, -3.1139e-02,
         1.7658e-03,  2.5954e-02, -9.3209e-03,  6.4668e-02, -9.7476e-02],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-8.0931e-02,  3.5311e+01, -3.7227e-01,  7.8502e-02, -7.8472e-02,
         2.0967e-02,  9.8181e-02,  1.5466e-01,  3.0179e-03, -2.4506e-01,
        -2.6291e-01,  1.6918e-02,  2.2741e-01, -4.3554e-03, -6.4562e-02,
         4.7763e-02, -6.4594e-02,  6.8704e-02,  4.0030e-02, -7.7710e-02,
        -2.6942e-02, -2.4203e-01,  8.0077e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3627e-02,  6.4918e+01,  1.7322e-01,  4.1544e-01, -1.8122e-02,
         1.5485e-01, -6.9026e-02, -3.7373e-01, -9.8524e-02, -2.8416e-01,
         2.3852e-01,  4.3890e-02, -1.6189e-01,  2.6510e-01,  1.6141e-01,
         2.4977e-02,  3.9324e-02,  5.4507e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9275e-01,  9.2753e+00,  4.9359e-02,  5.9953e-02,  1.4767e-01,
         5.6050e-03, -3.2767e-02, -4.5659e-02, -2.9352e-03,  2.9793e-02,
         5.8247e-02, -1.2517e-02, -1.6322e-03,  2.0515e-02,  4.0312e-02,
        -4.3632e-03,  2.0600e-01,  8.4199e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.3452e-02,  3.6939e+00, -2.5003e-02, -1.5722e-02, -2.8584e-02,
         2.3067e-02,  1.6846e-02,  4.9790e-03,  1.3421e-02,  1.6412e-02,
         1.4534e-03,  8.5010e-03,  5.8925e-03,  6.8768e-03,  2.2739e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6120e-02,  9.6810e-01, -7.3532e-03, -3.2418e-03,  1.2384e-03,
        -2.3923e-03, -3.2955e-03,  1.5272e-03, -2.7818e-03, -1.8261e-05,
        -4.1146e-03, -3.7633e-03,  2.0281e-03,  1.2151e-03, -3.5823e-03,
        -3.4656e-03, -4.6148e-03, -4.9995e-03,  2.6429e-03,  1.1232e-02,
         1.0944e-03,  4.2008e-03, -1.4474e-04, -2.9908e-04, -2.2892e-03,
        -1.2582e-03,  9.9110e-04, -6.7987e-04, -5.9818e-03,  1.2108e-03,
        -4.0403e-03,  5.6851e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2737e-02,  2.5198e+01, -1.8496e-01, -1.3193e-01,  1.1160e-01,
        -7.7496e-02, -7.1107e-02, -7.1255e-02, -2.1145e-01, -6.1233e-02,
        -8.0766e-02, -6.8537e-02,  9.4825e-02, -2.8790e-01, -8.9346e-03,
         2.4780e-02, -7.0307e-03, -1.9467e-01, -8.2370e-02, -1.1144e-02,
        -7.5567e-02, -1.3666e-02, -8.0677e-02, -7.0972e-02,  2.3059e-02,
        -8.5565e-03, -3.5009e-02, -1.2931e-01, -9.0301e-02,  8.1648e-02,
         3.0445e-02, -2.7051e-02, -1.3586e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1349e-01,  5.0027e+01, -4.8364e-01,  3.9196e-02, -1.8786e-01,
        -4.1988e-02,  6.7231e-02, -1.1300e-01,  1.1251e-01,  8.5575e-02,
        -2.7179e-02, -2.1022e-01, -5.1000e-02, -2.6929e-02, -1.1822e-02,
         2.7428e-02,  1.2836e-01,  4.0032e-02, -1.2830e-01, -2.8251e-02,
         1.5881e-02, -8.3265e-02, -5.8204e-03,  5.4904e-02, -8.6798e-02,
         1.4774e-02, -2.9440e-02, -6.4958e-02,  1.0934e-02, -9.3050e-02,
         5.9425e-02, -5.7125e-02,  1.8755e-02, -9.1447e-02,  1.5419e-02,
        -4.9283e-02, -4.3682e-02,  4.3691e-02, -2.1721e-02, -2.2963e-02,
         2.1647e-02, -4.2503e-02,  1.3462e-02,  3.3493e-03, -3.3325e-02,
        -9.6786e-02, -2.3908e-02, -1.8743e-02,  6.5847e-03, -1.8546e-01,
         2.8294e-02, -5.4037e-02,  1.4090e-02, -1.9415e-02, -1.1422e-02,
         3.7366e-02,  6.8825e-02, -2.0351e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5002e-01,  5.8613e+01,  6.4043e-01,  6.1505e-01,  1.2560e-01,
        -1.3252e-01, -7.7776e-04, -3.4864e-02,  1.1058e-01,  2.9616e-02,
         3.0406e-01,  1.8521e-01, -5.0326e-01,  1.6083e-01,  1.5435e-01,
        -1.0199e-01,  2.0829e-01,  6.1760e-02,  5.2423e-02,  1.3611e-01,
         8.2187e-02,  2.2348e-02, -8.4155e-02,  3.7545e-01,  3.9579e-02,
        -3.5354e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1150e-03,  7.7190e-01, -9.1323e-03,  9.2244e-03, -3.4597e-03,
         8.5184e-03,  4.8827e-03,  9.4187e-04,  2.9286e-03,  7.5981e-03,
         4.6909e-03,  1.3467e-03,  1.3976e-03, -5.9517e-04, -1.4360e-04,
         1.5794e-03, -2.1050e-03,  1.0211e-03,  2.0684e-03,  2.6014e-03,
        -1.2222e-04, -3.9979e-03, -1.2530e-03, -1.1526e-03, -9.0577e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5415e-01,  9.0975e+01,  1.3966e-01,  6.1446e-01, -2.3194e-01,
         7.0937e-02,  2.7512e-01, -2.4190e-01,  1.8248e-02, -2.3803e-02,
         4.3166e-01,  1.2471e-01,  1.7158e-01,  7.0659e-01,  4.7566e-02,
         1.2016e-01, -4.2138e-02,  5.0324e-02, -2.9553e-02,  2.3519e-01,
         4.4826e-02,  8.1338e-02,  1.4210e-02,  1.1068e-01,  1.1051e-01,
        -1.7210e-02, -6.4900e-02,  6.9378e-02,  1.1138e-02,  1.3904e-02,
        -3.4585e-01, -2.5683e-02, -5.2950e-01,  3.8630e-02,  2.0987e-01,
        -4.1039e-01,  5.2484e-02,  2.2987e-01, -2.9975e-02,  3.0649e-02,
         4.3411e-02, -2.0777e-01,  3.8486e-01,  1.7357e-01, -1.1050e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2409e-02,  1.4552e+00,  4.1044e-03,  2.1511e-02,  5.4284e-03,
         2.8036e-03, -1.3872e-02, -1.0339e-02, -4.5563e-03,  2.7600e-03,
        -2.5382e-04, -2.8731e-03, -1.5848e-03, -1.1107e-04,  1.9392e-03,
        -2.5732e-03,  8.8191e-03,  2.6111e-03,  1.2093e-03, -7.6248e-04,
         1.7366e-02, -3.6017e-03, -1.0126e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.4585e-02,  1.3470e+00,  1.0964e-02,  1.3949e-02, -9.1474e-03,
         1.2743e-02, -4.6903e-03,  4.9431e-03, -1.6594e-03, -6.9308e-03,
        -1.7074e-05,  4.9616e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 8.3529e-03,  1.5011e+00,  3.9772e-03, -1.2890e-02,  2.3086e-03,
         6.9370e-03,  3.7408e-03,  3.5858e-03,  7.8819e-03, -1.8056e-03,
        -6.0590e-03, -3.9361e-03, -2.4169e-03,  5.1768e-03, -3.0587e-03,
         4.8994e-03,  1.3902e-03,  2.4140e-03,  3.5111e-03,  3.2164e-04,
        -1.9305e-03,  4.0483e-03, -3.9509e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4196e-02,  3.1736e+00, -1.6402e-03,  1.2134e-03, -1.3127e-02,
        -5.2864e-03,  1.0063e-02,  8.8911e-05, -1.7493e-03,  2.7856e-03,
        -1.2407e-03, -7.7541e-03,  4.0072e-03, -3.8552e-03,  5.6891e-03,
        -5.5467e-04, -6.9742e-04,  4.0972e-03,  5.6933e-03,  4.6850e-03,
         1.7233e-02, -6.3834e-03, -9.9775e-03, -4.5782e-03,  1.2018e-02,
        -3.3595e-03, -9.6733e-03, -2.1041e-03, -5.8894e-03,  2.0010e-04,
        -1.9507e-03, -8.1878e-03,  2.6739e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3395e-01,  1.8034e+01,  1.2880e-01, -1.0298e-01,  8.6624e-02,
         9.4265e-03, -4.7307e-02,  4.6304e-02,  8.1049e-02, -8.2495e-02,
         1.1873e-01, -4.6667e-02,  4.1994e-02,  5.7730e-02,  9.3797e-02,
        -4.7942e-02, -1.6803e-02, -3.9136e-02, -4.3577e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5119e-02,  3.9363e+00,  4.1225e-02, -2.3872e-02,  3.3683e-02,
         2.1683e-02,  3.5245e-02,  1.3938e-02,  2.0200e-03,  3.6715e-02,
         3.3293e-03,  1.8832e-03, -1.5706e-03,  6.3507e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1720e+00,  7.6984e+01,  5.4749e-01, -3.4620e-01, -5.3491e-02,
         1.1520e-01,  1.7163e-01, -7.9012e-03,  1.2702e-01, -1.9601e-01,
        -1.2812e-01, -1.7603e-01,  5.4585e-02,  2.4336e-01,  1.0487e-01,
        -1.2183e-01,  2.1572e-01, -1.2996e-02, -3.4701e-02,  8.8791e-02,
        -1.1629e-01,  1.7219e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0511e-01,  2.3385e+01,  1.6802e-01, -1.3703e-01,  7.1343e-02,
        -1.4290e-02, -6.9332e-02, -1.7253e-01, -1.9817e-02, -9.5025e-02,
        -7.5900e-02,  1.2019e-01,  7.2977e-02,  4.4249e-03, -2.8297e-02,
         2.7552e-02,  6.0948e-03,  7.0498e-02,  4.1834e-02,  8.6017e-03,
        -3.5906e-02,  3.0217e-02,  2.4648e-02, -3.4952e-02, -6.1927e-02,
         7.4927e-02,  2.3240e-02, -2.7842e-02,  5.6745e-02, -1.3551e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0998e-01,  3.7575e+00,  5.6573e-03, -7.0902e-03, -4.2348e-03,
         3.9791e-02,  2.8608e-02, -1.5700e-03, -7.3391e-03,  7.3402e-03,
        -3.0126e-02,  2.3340e-03, -9.4324e-03,  4.8645e-03,  1.5149e-02,
        -2.7793e-02, -3.1212e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4581e-01,  3.0651e+00, -1.1976e-02,  2.0619e-02, -2.4214e-04,
        -5.5025e-03, -4.4174e-03,  3.1132e-02,  6.0987e-04,  9.6446e-03,
         1.1509e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8680e-01,  3.2589e+01, -8.1675e-02, -1.3129e-01,  1.9096e-01,
        -2.0610e-01, -7.5721e-02, -7.6710e-03, -1.5726e-01,  2.7602e-02,
        -9.6539e-02,  9.4748e-02, -2.0647e-01, -2.7446e-01, -2.3093e-02,
        -2.5320e-01, -1.6379e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0638e-02,  4.8940e+00, -3.6278e-02,  1.5039e-02,  5.8668e-03,
         2.9186e-02,  6.6147e-03,  9.2936e-04, -3.3942e-03,  5.6106e-03,
         8.4100e-03,  8.6622e-03, -4.2655e-03, -8.5683e-03,  4.4831e-02,
        -2.2817e-03,  3.6363e-03, -3.1734e-03, -1.7924e-02,  1.8303e-03,
        -1.6305e-03,  1.2724e-02,  2.9882e-02, -5.7396e-03,  2.7204e-03,
         2.3643e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8489e-01,  2.6501e+01,  8.9026e-01,  8.2816e-02,  3.2157e-02,
        -1.5956e-02, -5.9174e-02,  1.8301e-01, -6.3079e-02, -2.4603e-02,
        -1.3840e-02, -1.2363e-01,  9.6364e-02,  3.4027e-02,  4.2661e-02,
        -1.4181e-02, -5.6909e-02, -1.9811e-02,  4.4245e-02, -1.5849e-02,
        -3.3308e-02, -5.9251e-02, -2.8409e-02,  1.4902e-02, -6.2324e-03,
        -1.4799e-02,  1.1299e-01, -3.3724e-02, -1.5433e-01, -1.4511e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9844e-03,  7.8766e-01,  6.3303e-04,  9.6091e-04, -7.1208e-05,
        -4.7756e-03, -1.4024e-03, -4.9361e-05, -5.5414e-04, -4.9038e-03,
         7.4837e-04,  7.0179e-03, -2.7848e-03, -5.1913e-05,  4.6151e-04,
        -1.2206e-03,  5.0275e-04, -2.8721e-03, -6.3106e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 3.5664e-01,  1.8379e+01, -4.0260e-02, -1.4385e-01, -2.8061e-02,
        -3.7947e-02,  9.5874e-02,  4.9090e-02, -4.7071e-02,  1.1549e-02,
        -1.9052e-02, -5.2558e-03,  2.5654e-04, -8.2905e-03, -1.5716e-01,
        -3.6779e-02,  4.9180e-02,  1.4004e-01,  8.0966e-04, -2.1507e-02,
        -4.2094e-03, -3.1924e-03,  3.5012e-02, -2.7630e-02, -3.7740e-02,
         1.3988e-02,  7.7765e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2967e-02,  3.4977e+00,  1.2911e-02, -1.6512e-02, -1.2885e-02,
         2.5969e-02,  1.4876e-02, -8.2425e-03,  1.2036e-02,  2.0177e-03,
         7.9534e-04,  3.0336e-02,  9.4824e-03,  8.2080e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6203e-03,  2.5347e+00, -4.4300e-04,  1.0384e-02,  1.5173e-03,
        -1.8172e-02,  9.7817e-03,  1.6973e-02,  3.6181e-03,  3.8903e-03,
         8.3146e-03, -8.0098e-03,  6.8730e-03,  3.3549e-03, -3.5963e-03,
         2.3705e-03, -2.2771e-03, -9.5432e-03,  6.5975e-03,  1.5738e-02,
        -3.6822e-03, -1.1430e-02, -1.7447e-03,  4.1254e-03, -3.1753e-03,
         2.3732e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1376e-02,  4.2404e+00,  6.1318e-03,  2.2446e-02,  3.6504e-02,
        -5.6829e-02,  3.8138e-04,  1.5913e-02,  2.7852e-02,  7.7996e-03,
         1.8839e-02, -4.1637e-02, -1.4568e-03, -4.0537e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6955e-02,  5.3791e+00, -1.3474e-03, -1.9642e-03, -5.7666e-03,
         2.0035e-03,  1.9527e-02, -4.2200e-03,  6.9000e-04,  1.8054e-02,
         3.1444e-03,  3.6061e-03,  1.2744e-02, -3.5453e-04,  1.2703e-02,
         1.3762e-02, -2.2569e-02,  5.5620e-03, -1.4047e-03,  1.7260e-02,
         3.3840e-03,  2.0288e-02, -7.3860e-04,  1.2613e-03,  1.0644e-03,
         9.4950e-03,  2.7637e-03,  6.5996e-03, -1.2008e-04, -3.1672e-03,
        -3.8024e-03,  2.2347e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2972e-02,  2.5778e+00,  2.8588e-04,  5.2187e-02, -4.5194e-03,
         1.2195e-02,  3.2686e-03, -2.2898e-02,  6.6276e-03,  1.4751e-02,
         4.8650e-03,  4.6862e-03,  5.8014e-03, -3.9707e-03,  6.4325e-03,
        -7.1953e-03,  3.4739e-03,  6.8859e-03,  7.8940e-03,  1.1927e-02,
         6.9251e-03, -5.6073e-03,  1.7323e-03,  3.4803e-03,  1.8622e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4808e-02,  9.6657e+00,  1.9172e-01, -4.9706e-02,  7.3974e-03,
         2.9139e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1735e-02,  1.0965e+00,  7.2340e-03, -1.6275e-02,  4.3082e-03,
        -4.2240e-03,  2.7612e-03, -5.7098e-03, -1.4881e-03,  3.8290e-03,
        -2.8428e-04,  1.6268e-03,  4.2571e-04,  7.8754e-03, -2.3283e-03,
        -2.1843e-03,  7.9525e-04,  8.1542e-04, -1.6602e-03,  3.0337e-03,
        -3.8970e-03,  7.8762e-04, -9.7978e-04, -5.9506e-04, -1.5847e-03,
         2.3209e-03, -6.8662e-04, -2.6503e-05,  8.4066e-04,  2.3289e-03,
         2.5888e-03, -1.0630e-03,  6.2662e-04, -2.8606e-03,  1.7555e-03,
         1.0295e-03,  1.6022e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1460e-01, -6.4249e+01,  6.4947e-01,  5.6166e-02, -2.6946e-01,
         2.4313e-02,  1.5632e-01, -3.4059e-01,  1.5166e-01,  3.2627e-02,
         1.9378e-01, -1.8063e-01,  2.5096e-01,  2.7206e-01, -1.9360e-02,
         2.0302e-01,  2.0618e-01, -7.9659e-02,  1.5859e-01, -1.0738e-01,
         4.9051e-01, -2.4768e-02,  4.4712e-01, -1.7367e-03, -6.1975e-02,
        -3.0821e-02,  7.3237e-02, -3.2846e-01, -1.4341e-01, -1.6086e-01,
         5.3142e-02,  1.0725e-01,  2.8445e-02,  3.3662e-01,  1.9632e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3322e+00,  9.8935e+01, -4.7355e-01, -5.3573e-01, -2.0578e-01,
        -1.8979e-01,  5.9613e-01, -1.6104e-01, -5.4122e-01,  2.7092e-01,
         2.6233e-01, -8.3125e-01,  8.4570e-01,  1.2605e-01,  1.1054e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9324e-01,  1.2247e+02, -1.6702e+00,  9.0778e-03,  1.9196e-01,
         6.5413e-01,  3.3796e-01,  1.8731e-01,  8.4736e-02,  6.4192e-02,
        -5.2084e-01,  1.9283e-01,  3.4569e-01,  1.9966e-02, -5.4221e-02,
        -2.2377e-01, -1.9030e-01, -1.7596e-01,  3.5963e-01, -4.2463e-02,
         4.5398e-02,  2.9365e-01,  4.9271e-02, -7.6238e-02, -8.3668e-02,
         1.1741e-01,  5.1316e-02,  4.9111e-02,  1.5725e-01,  3.5684e-01,
         1.3378e-01,  1.1806e+00,  8.9562e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8645e+00, -1.0364e+02, -3.5151e-01, -2.1136e-02,  7.9806e-02,
        -4.1367e-02, -2.0523e-01,  6.6599e-02,  1.2160e-01, -2.6431e-01,
         4.7368e-01,  8.1777e-01,  3.2715e-01,  1.7473e+00,  4.5675e-01,
        -3.1310e-02,  1.0976e-01,  7.5474e-02,  2.7651e-02, -6.4425e-02,
        -9.3778e-02,  6.7821e-02,  2.5790e-02, -4.1108e-02, -1.2967e-01,
         3.4765e-01,  2.2357e-01, -3.7720e-01,  9.4736e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 8.8559e-02,  3.2466e+00, -5.5446e-03, -1.8569e-02, -3.9326e-02,
        -2.4402e-02, -3.7908e-03,  3.0668e-02,  8.1226e-03, -5.7737e-02,
        -9.2855e-03, -9.5333e-04,  2.2062e-03,  2.1650e-02,  3.1289e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1264e-01,  2.3658e+01, -6.1459e-01, -2.8679e-01, -9.6955e-02,
        -3.8291e-02, -5.0672e-02,  4.6157e-02,  7.5761e-02, -2.6225e-02,
         7.7558e-03, -3.0799e-02, -3.6824e-02, -1.7815e-02,  7.2342e-02,
         1.1414e-01,  5.1146e-03,  5.6727e-02, -2.0056e-02, -1.0001e-02,
        -2.2955e-02, -5.7783e-02, -3.4434e-02, -1.4363e-02,  2.6081e-03,
         8.2618e-03, -3.0569e-02, -6.3668e-03, -5.7625e-02,  3.8592e-03,
        -1.1767e-02, -2.2981e-02, -1.0355e-02, -1.6273e-02, -2.2265e-02,
         3.8118e-02,  1.9145e-02,  9.7016e-03, -7.0618e-03, -3.2915e-02,
        -3.5913e-02,  3.4235e-03, -6.6312e-02, -2.7856e-02, -9.7122e-03,
        -6.8510e-02,  2.2638e-02, -1.0980e-03, -1.4681e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9389e-01, -9.2408e+01, -2.0168e+00, -5.7832e-01, -4.2641e-01,
        -7.8541e-01, -7.0275e-01, -2.1831e-02,  7.5533e-01, -6.2772e-01,
        -3.2431e-01, -4.1522e-01,  3.3426e-02, -2.5703e-01, -1.3594e-01,
        -6.1244e-02, -9.7417e-01, -4.2398e-01, -1.8049e-01, -9.7147e-02,
        -4.6795e-02,  1.6442e-01, -2.3463e-02, -1.4896e-01, -2.8592e-01,
        -1.1662e-01, -3.7386e-01, -5.3924e-01, -4.3298e-01,  8.3139e-02,
        -1.8499e-01,  1.9586e-02, -3.5902e-02, -1.9366e-01,  8.7335e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3880e-01,  3.1034e+01,  1.5299e-01, -1.5379e-01, -9.6920e-03,
         5.2306e-02, -1.7812e-01,  4.3744e-02,  5.5705e-02, -8.5792e-03,
        -2.0172e-01,  7.7712e-02, -3.3371e-02, -2.5313e-02,  1.4323e-01,
        -5.1392e-02, -4.0544e-02, -1.7105e-01, -1.6202e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1853e-01,  4.3429e+01, -1.4079e-03,  2.6356e-01,  2.0210e-01,
         1.6111e-01, -5.2900e-02, -5.8462e-02,  3.5896e-01, -3.4382e-02,
        -1.3392e-01, -1.7359e-01,  3.1341e-02, -9.9171e-01,  3.4401e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.6551e-03,  9.5324e-01, -8.3347e-03, -6.8239e-03, -1.0974e-03,
        -8.4536e-03, -1.9799e-03, -3.1468e-03, -6.0777e-03, -2.0238e-03,
        -9.9721e-04,  4.3502e-04, -2.7185e-03, -2.8372e-03,  1.9411e-03,
        -2.1361e-03, -1.6280e-03, -9.6584e-03, -6.8570e-04,  3.6571e-03,
        -1.3171e-03, -4.5450e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1409e-01,  5.6713e+00, -8.8931e-02,  1.1518e-03,  1.8346e-02,
         1.6856e-02, -1.5771e-02, -1.3773e-02, -5.6961e-02, -8.5052e-04,
        -1.2189e-02,  4.9246e-04,  3.2102e-02, -1.0469e-02, -8.2495e-03,
         7.0309e-03, -3.9222e-03, -1.4674e-02,  1.5473e-02,  8.2919e-05,
        -3.6025e-03,  4.3050e-03, -7.0208e-03,  1.2384e-02,  2.9759e-02,
        -9.0669e-03, -1.5256e-02, -2.6888e-02,  2.1609e-02,  3.4656e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7758e-02,  4.1295e+01, -4.4575e-01, -7.7173e-01, -8.5458e-02,
        -1.9591e-01,  1.4048e-01, -1.7507e-01,  8.1462e-01,  1.9567e-01,
         3.1717e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1708e-01,  8.7939e+00,  4.6365e-02,  6.0646e-02,  8.8673e-02,
         2.4973e-02,  4.0942e-02,  1.2234e-02,  3.7326e-03, -1.4380e-02,
         6.4686e-02,  8.0020e-02,  1.5297e-02,  1.0672e-02,  3.4290e-02,
         5.9044e-02, -3.4518e-02,  9.4277e-03, -1.6490e-02, -2.9991e-03,
         1.1105e-02, -4.8047e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.3328e-01,  2.5365e+01, -8.1890e-02, -1.7785e-01,  9.5212e-02,
        -3.2752e-01, -8.0158e-02,  4.6343e-02,  2.0564e-02, -1.5855e-02,
        -2.5497e-02,  9.0742e-02,  8.2940e-04, -1.3129e-03, -1.2710e-02,
        -7.9749e-02, -2.4081e-02, -1.1376e-02, -1.0793e-02, -1.2555e-02,
        -2.6915e-02, -6.6178e-02, -2.4693e-02,  1.0262e-02,  1.9447e-02,
        -1.8110e-02,  5.9920e-03, -3.8504e-02, -2.9201e-01, -3.0221e-02,
         9.1566e-02, -3.7718e-02,  1.4159e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1616e+00,  1.1291e+02,  2.6295e+00,  1.3202e+00,  2.2127e-01,
         5.8977e-01,  2.7043e-01,  3.4948e-02, -6.0771e-01, -3.7378e-01,
        -4.2531e-01,  4.6409e-02, -9.6107e-02,  4.8352e-02, -4.3979e-01,
         1.8870e+00,  4.4458e-01,  6.8224e-03,  1.0099e-02, -1.5093e-01,
         7.4739e-01, -2.7289e-01,  5.5085e-01,  7.9511e-02,  2.8853e-02,
         1.2429e+00, -2.5874e-01, -3.1128e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7970e+00, -1.1745e+02, -1.8265e-01, -3.1910e-01,  5.8104e-02,
        -2.9080e-01, -1.2734e-01, -8.3428e-01,  9.9554e-01, -5.3284e-01,
         2.6633e-01, -1.2997e-01,  9.1964e-02, -1.0221e-01,  5.6089e-02,
         6.9703e-02, -1.2079e-01,  2.3718e-01, -4.5658e-03, -3.4066e-03,
         2.1289e-01, -1.2304e-02,  5.7175e-01, -5.6489e-02,  1.7427e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 4.5675e-01,  2.8479e+01,  6.2118e-01, -1.5561e-01,  1.9874e-02,
         1.0918e-01, -9.2674e-02, -7.7235e-02, -1.7640e-01, -1.2756e-02,
         7.8021e-02,  9.7746e-02, -2.3398e-02, -4.7543e-02, -8.4374e-02,
        -1.8156e-01, -3.9336e-02,  2.9377e-02,  1.8485e-02, -5.3569e-02,
         4.4729e-02, -1.8086e-02, -8.6900e-02,  1.8130e-04,  2.0267e-02,
        -1.6277e-02,  4.9660e-02, -9.8875e-02,  2.0574e-02, -1.8970e-01,
        -1.2632e-01, -3.1157e-02, -6.9434e-02,  2.4488e-02, -2.9357e-03,
         5.1150e-02, -4.9460e-02, -2.4689e-02, -1.8806e-02, -1.5382e-02,
        -4.9987e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.4194e-02,  6.0142e+00,  2.5883e-02, -2.0368e-02,  1.4036e-02,
        -3.2648e-02, -1.3787e-02, -3.2876e-02, -3.6725e-02, -2.0995e-02,
        -4.6450e-04, -4.1985e-02,  6.9173e-03,  1.0178e-02,  1.7387e-03,
         1.4442e-02,  5.9057e-04, -1.8535e-02, -6.4815e-03, -2.5756e-02,
        -2.8580e-02, -6.3354e-03, -8.3438e-04,  3.9866e-03,  1.1223e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7827e-01,  3.9760e+01, -2.8217e-01, -3.0698e-01, -2.6499e-01,
         3.3513e-02,  4.2289e-02,  1.7130e-01, -1.7457e-01,  4.5140e-03,
        -5.7753e-02,  1.3707e-01,  5.3595e-02,  9.9244e-02, -1.2299e-02,
        -3.6660e-02,  2.0972e-01,  7.8205e-02, -7.4980e-03,  6.3153e-02,
         1.0192e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.3536e-03,  1.8175e+00, -1.7436e-02, -1.6951e-02,  2.5635e-03,
         8.0733e-03,  1.1708e-02,  8.0039e-03, -4.5512e-03, -8.7624e-04,
         7.1621e-03, -6.5211e-03, -2.6408e-03, -2.5080e-03, -1.2918e-02,
        -1.1167e-02, -5.4996e-03, -9.3216e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1421e-01,  7.4138e+00, -5.1281e-02, -1.0908e-02, -8.4220e-03,
         1.1893e-02, -9.4240e-03, -3.3776e-02, -5.4528e-02,  9.8132e-03,
         2.4442e-02, -2.3807e-02, -3.7143e-02,  8.2760e-03, -2.3927e-02,
         3.4868e-02,  1.9673e-03, -7.0497e-03,  8.4278e-03, -4.7845e-03,
        -1.1978e-02, -1.4359e-03, -1.2021e-02, -3.7791e-03,  2.8584e-02,
         8.9851e-03, -6.9692e-03,  7.0783e-03, -1.5912e-02, -2.9517e-02,
        -2.2799e-04,  2.9185e-03,  2.3619e-03,  5.6795e-03,  7.9391e-03,
         3.3388e-02,  1.4408e-02,  1.4960e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7553e-01,  9.4118e+00, -1.9512e-01, -1.2709e-02,  2.8083e-02,
         1.6354e-02, -2.4172e-02,  1.0223e-04, -6.8156e-02, -3.4759e-02,
        -1.0152e-02, -4.0032e-02, -1.1020e-02,  3.7628e-02, -7.7754e-02,
        -3.4747e-02, -2.1800e-02, -7.3600e-02, -6.0816e-02, -5.6618e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8531e-01,  9.8796e+00, -2.7454e-02,  6.8669e-02,  2.6358e-02,
         2.0558e-02,  1.2497e-02,  4.9341e-03,  3.1644e-02,  5.3126e-02,
         1.0791e-01, -4.1018e-02,  1.0965e-01,  3.4108e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9539e-02,  9.0279e+00, -4.7863e-02,  1.3595e-02,  5.3567e-02,
        -2.3296e-02, -2.3618e-02,  7.0223e-03,  1.7080e-02,  1.6985e-02,
        -1.3785e-02,  1.1116e-02,  1.9537e-02, -2.7330e-03, -4.4648e-03,
        -2.0911e-02, -1.5600e-03, -1.5848e-02,  1.5135e-03, -7.5373e-03,
        -2.5111e-02,  7.4716e-02, -1.1509e-02,  1.6855e-02, -2.8620e-03,
        -9.5068e-03,  1.3387e-02, -4.1066e-03,  6.7966e-03, -1.8451e-02,
         1.4901e-03,  2.3080e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.1560e-01,  4.2665e+01, -3.4785e-01,  5.2191e-01, -7.7650e-01,
        -2.5884e-01, -2.6922e-01, -1.3405e-01, -2.2480e-01, -1.8914e-01,
        -2.0318e-02, -2.4194e-02,  3.2548e-02, -1.1859e-01,  7.7565e-03,
        -2.7171e-02,  2.5476e-02, -1.1610e-01, -2.4284e-01,  8.7702e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0124e-03,  1.6134e+00,  1.3242e-02, -6.4973e-03,  7.9368e-03,
        -7.1473e-03, -3.0651e-03,  3.4341e-04,  6.7360e-04, -3.0151e-03,
        -5.7993e-03,  7.6189e-04, -1.8231e-03,  3.7205e-04, -1.6251e-03,
        -4.0044e-03, -2.0764e-03,  2.1870e-04, -3.5208e-03, -3.0494e-03,
        -1.5102e-04, -9.4326e-04,  1.0517e-03, -3.6064e-03,  3.0070e-03,
        -5.3443e-03, -5.8926e-03, -5.6947e-03, -3.3440e-03, -1.9571e-03,
         1.1985e-03, -1.0220e-03, -6.0588e-04,  7.8832e-03,  2.6109e-03,
        -4.0885e-05,  2.2422e-03, -3.4057e-03,  4.2218e-03,  1.2232e-03,
         1.2688e-03,  1.4516e-03,  1.6562e-03, -3.1515e-03,  6.1470e-04,
        -6.6849e-03, -1.1300e-03,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0480e-01,  2.0106e+01,  1.5769e-03,  1.3310e-01,  1.7410e-01,
         1.1228e-01,  9.7265e-02, -5.7449e-02,  1.3755e-01, -8.4323e-02,
        -3.4716e-03, -7.4210e-02, -6.3531e-02,  2.9151e-02,  9.8014e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7921e-01,  1.5249e+01, -8.2711e-02, -2.4045e-02, -6.9245e-02,
        -4.0968e-02, -2.6363e-03, -4.1368e-02,  8.1203e-02,  5.9412e-02,
        -3.9317e-02, -5.0100e-03,  1.1376e-03,  2.3036e-02, -4.1245e-02,
        -3.0646e-02, -3.3167e-02,  4.1925e-03, -7.3476e-03, -2.2690e-02,
         3.4724e-03, -6.3471e-02, -2.2275e-02,  1.1855e-02,  6.9811e-04,
        -3.0598e-02,  4.6704e-02,  1.9832e-01,  9.4715e-03,  3.2163e-02,
         9.3840e-05, -4.4398e-02, -1.3125e-02, -8.0527e-03, -7.4526e-04,
        -4.1666e-02,  1.4667e-03, -1.6123e-02, -6.8934e-02, -2.8802e-02,
        -1.2478e-02, -1.4166e-02, -2.3277e-02, -3.1202e-02, -2.6552e-03,
        -1.3006e-02,  1.1073e-02,  2.1188e-02], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-4.2949e-01,  6.6952e+01, -2.6660e-01, -2.8759e-01,  2.6797e-01,
        -7.3627e-01,  7.2013e-02, -4.0739e-02, -2.0968e-01, -8.8152e-01,
         5.1287e-02, -2.8062e-01, -2.8974e-01,  8.4853e-03, -6.8887e-02,
        -2.2031e-01, -7.3234e-01, -3.7935e-01, -1.0050e-01,  1.1528e-01,
        -8.5458e-03,  7.8843e-03,  3.5608e-02, -2.8292e-01, -1.6099e-01,
         3.4870e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.9646e-01,  4.7476e+01, -1.1615e-02, -4.5831e-01, -2.6543e-01,
        -1.9151e-01,  8.1401e-02,  9.5153e-02, -3.7341e-01, -1.2309e-01,
         1.5771e-01,  9.4374e-02, -1.4305e-02, -6.9601e-02,  5.1561e-02,
        -3.8428e-01,  1.0972e-01, -9.2409e-02,  1.1013e-01,  2.9823e-02,
        -7.0379e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8440e-03,  2.6957e-01, -9.6789e-04, -5.4831e-03, -1.7921e-03,
        -1.4372e-03,  2.3316e-04, -3.1828e-04, -5.0433e-04, -3.8706e-04,
         1.0378e-04, -4.5018e-04, -5.7323e-04, -7.6892e-04,  2.4304e-03,
        -5.1380e-04,  8.6910e-04, -6.1091e-04,  4.1360e-04, -1.0972e-03,
         1.2594e-04, -5.5769e-04, -9.2655e-04, -2.8443e-05,  2.0449e-03,
        -2.6915e-04, -3.5034e-04, -6.7231e-05, -2.4560e-04,  5.8811e-04,
        -1.2167e-03, -1.1858e-03,  9.5681e-04,  7.6464e-05, -7.4772e-04,
        -3.3068e-04,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0644e-01,  2.6287e+01,  7.7329e-02,  2.3519e-01, -9.3088e-02,
         1.0811e-01, -1.5026e-01,  1.4941e-02,  1.5424e-02,  8.6352e-02,
         2.2769e-01, -9.4249e-02, -6.9713e-02, -1.4264e-02, -1.4292e-02,
         2.0618e-01,  5.8943e-02,  1.4078e-03,  3.0503e-01, -1.5515e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3929e-02,  1.0786e+01, -7.0837e-02,  2.9731e-02,  2.2350e-02,
        -3.6351e-02, -1.9751e-02, -3.9765e-02,  1.0037e-01, -3.5099e-02,
         1.0304e-02, -3.2254e-02, -2.4759e-02,  1.2467e-02,  4.8609e-02,
        -2.3504e-02,  7.9664e-04, -5.2363e-02, -2.5095e-02,  1.0671e-02,
        -7.2698e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0677e+00,  6.3503e+01, -3.7213e-02, -3.4964e-01, -2.4432e-01,
        -6.1997e-01, -2.3986e-01, -3.7078e-01, -4.4202e-01, -2.7046e-01,
        -1.9222e-01,  1.7399e-02, -6.8768e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4363e-02,  7.7969e-01,  6.2859e-03,  3.3317e-04, -1.6978e-04,
        -2.9922e-03,  7.5508e-03, -3.3926e-03, -4.4293e-03, -3.9420e-04,
        -6.7070e-03, -4.8862e-03, -3.9567e-03,  8.6032e-04,  4.1973e-03,
        -2.1397e-03, -1.8568e-03,  1.3154e-03, -3.0299e-03,  2.1405e-04,
         9.2191e-04,  6.1111e-03, -2.3481e-03, -2.3389e-03, -1.2848e-04,
        -9.4457e-04, -1.4071e-03, -1.6190e-03, -5.4419e-04,  4.2190e-04,
         4.0352e-04,  1.1437e-03,  2.2817e-04, -2.5069e-03,  8.8770e-04,
         3.1310e-03,  4.0278e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8286e-01,  1.8824e+01, -3.5832e-02, -8.2428e-02, -3.5718e-02,
        -5.9891e-02,  4.1161e-02,  7.5760e-02, -2.8249e-02, -1.3748e-01,
        -1.8663e-02,  6.8960e-02, -4.1140e-02,  5.5626e-02,  2.1902e-02,
        -3.6911e-02,  6.6194e-02,  3.8016e-02,  1.2861e-02,  2.0566e-02,
        -4.6261e-03,  1.0165e-03,  1.9762e-01,  1.1723e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9219e+00,  1.3753e+02,  1.5493e+00,  6.6896e-01, -1.7659e-01,
        -4.0685e-01,  1.4336e-02,  5.6320e-01, -5.9770e-02, -6.1362e-01,
        -7.9578e-01,  4.6864e-01,  7.3137e-02,  1.5183e-01, -9.3997e-02,
        -1.8984e-01, -1.8847e-01,  4.8531e-02, -2.6556e-02, -4.8492e-02,
         9.7579e-02, -2.9231e-01,  9.7133e-02, -1.5275e-02, -2.5593e-01,
        -6.9948e-02,  1.3552e-02, -1.7426e-02, -2.1960e-01,  1.0854e-01,
        -5.6684e-01,  1.7388e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3150e-02,  3.0816e+00,  6.7164e-03, -5.4847e-02,  5.5340e-04,
        -3.1902e-02, -8.5496e-03, -1.4642e-02,  2.5670e-04,  1.8585e-02,
         1.1881e-03,  1.6808e-03, -3.1263e-03, -3.5904e-03, -1.4432e-03,
         4.4790e-03, -7.3980e-05, -1.2362e-02,  7.0628e-03, -2.8654e-04,
         7.4129e-03, -2.9662e-02, -3.3976e-03,  8.1215e-03, -4.4465e-03,
         3.5740e-03,  5.6554e-03, -1.2662e-03,  5.3238e-03, -7.4673e-03,
        -2.2412e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3988e-01,  1.5852e+01,  1.9995e-01,  9.6760e-02,  7.8178e-02,
         7.1749e-02,  4.5402e-03, -2.0167e-01, -5.4176e-02,  3.0635e-02,
         6.4687e-02,  2.9687e-02,  2.6875e-02, -1.9405e-02,  3.3613e-02,
         4.4821e-02,  7.1921e-02,  6.0265e-02,  6.3588e-02,  1.0065e-01,
         3.8200e-02,  4.3676e-03, -1.8312e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0248e-02,  1.0867e+01,  1.4536e-02, -6.7806e-02, -5.5518e-02,
         3.4996e-02, -4.8308e-02, -1.1065e-02, -7.0543e-03,  8.2313e-03,
         2.1060e-02, -4.7511e-02,  1.9310e-03, -9.5581e-03,  1.2170e-02,
         1.0755e-02,  3.9315e-02, -1.9984e-02, -2.4611e-02,  1.1971e-02,
        -3.0379e-02,  1.2618e-02,  1.6871e-02,  2.3712e-02,  4.3512e-03,
         2.0738e-02, -8.2121e-03, -1.9319e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 3.0225e-02,  4.7212e+00, -5.1405e-02,  2.0196e-03, -1.4874e-02,
        -5.5093e-03,  8.9718e-03, -1.6519e-02, -1.0409e-02,  6.0544e-03,
         1.4284e-02,  5.1166e-03,  1.2482e-02, -9.0525e-03, -7.4194e-04,
        -4.0446e-04,  6.6980e-04,  5.4397e-03, -6.9654e-04, -7.6486e-03,
         1.8411e-03,  4.6550e-04, -3.8487e-03, -4.8986e-03,  5.7461e-03,
        -1.3651e-03, -6.4839e-03, -4.2003e-02, -5.0043e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.2824e-04,  4.4455e-01,  9.6355e-03, -1.7689e-03, -2.0564e-03,
         1.0985e-03,  9.0891e-04, -4.2789e-04, -6.9466e-04,  3.7142e-04,
         6.1455e-04, -1.0729e-03, -8.8930e-04,  1.1092e-03,  1.1826e-03,
        -8.3368e-04,  1.8091e-03,  5.0899e-04,  1.3527e-03,  4.4977e-05,
         5.4536e-04,  1.2978e-03, -1.3941e-04, -1.8693e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4153e-02,  6.0839e+00,  3.3004e-02, -2.1539e-02, -2.8762e-02,
         3.4660e-02,  1.5036e-03, -9.4197e-03, -5.6746e-03,  6.9981e-03,
         2.3705e-03, -9.9150e-03, -8.0857e-03, -5.8360e-03, -2.0378e-03,
         5.7239e-03, -1.2562e-02,  7.2657e-03, -2.2260e-02, -3.7304e-03,
         8.0855e-04,  3.9287e-03,  1.0822e-02, -6.9272e-05,  1.3097e-02,
        -1.9045e-03, -1.1958e-02,  3.5488e-03, -1.0402e-02, -3.5454e-03,
        -5.0396e-03,  1.9331e-03, -5.6892e-03, -8.3119e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8802e-01,  9.7061e+01, -3.5695e-01, -2.3562e-01, -2.1989e-01,
        -4.6136e-01,  6.2640e-03, -4.3501e-02,  1.6724e-02,  9.9831e-03,
         7.6818e-02, -1.9391e-01, -6.3308e-01, -2.1793e-01, -5.2443e-01,
        -3.4040e-01, -2.3959e-01,  1.9672e-01, -9.4325e-02,  6.7943e-02,
         8.3184e-02,  2.9091e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.3928e-03,  1.2176e+01,  8.2244e-03,  5.9738e-02, -1.3554e-02,
        -4.3689e-02,  2.5086e-02, -1.1004e-02,  2.6383e-02,  8.2089e-02,
         1.1311e-01,  2.8670e-02, -1.3785e-02, -9.8412e-03,  2.0365e-02,
        -3.9469e-02, -6.0298e-02, -2.2693e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1111e-02,  3.9179e+00, -3.3201e-02,  4.0906e-03, -3.0345e-03,
         1.1091e-02,  1.1003e-03,  9.2122e-03, -4.7905e-04,  8.4990e-03,
         5.2291e-03, -1.2855e-03, -2.6402e-03,  6.3317e-03, -1.2305e-02,
         3.2279e-04,  2.7032e-03, -1.1286e-02, -4.1057e-03, -1.6369e-02,
         6.6806e-03, -3.4438e-03,  7.4451e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3312e-01,  6.1223e+01,  2.4839e-01,  9.4690e-02, -8.0484e-01,
         7.8824e-02,  2.4517e-02,  1.4392e-02, -4.3901e-01,  7.0853e-02,
         4.7547e-02,  1.6147e-01, -8.1148e-02, -1.2781e-01, -1.5523e-01,
         2.6187e-01, -3.5108e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4096e+00, -1.9667e+02, -4.7300e+00, -7.7666e-01,  4.0413e-01,
         2.8186e-01,  6.6745e-01,  2.6812e-02,  2.1182e-02,  3.8321e-02,
        -2.7768e-01,  2.0933e-01,  7.2627e-01, -1.1095e-01,  1.0192e-01,
        -9.3944e-01, -1.1454e+00,  1.5583e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4242e-02,  6.3520e+00,  3.4463e-02,  7.5207e-03,  2.0899e-02,
         3.8028e-02, -1.8958e-02, -2.1127e-02, -1.7514e-02,  4.5841e-02,
        -2.8631e-02, -2.8221e-02, -7.1211e-02, -2.0496e-02, -1.1142e-02,
        -1.5900e-02,  1.2528e-02, -2.1346e-02,  2.7263e-02, -3.0230e-03,
         5.0620e-03,  2.4759e-03,  1.1812e-02, -2.4645e-03, -3.9036e-02,
        -6.7019e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1144e-01,  5.3431e+00, -3.0215e-02, -1.5303e-03,  1.8929e-02,
         2.9269e-02,  8.3294e-03,  3.4459e-02,  5.0702e-02, -3.2763e-03,
         4.3628e-03,  2.4121e-03,  4.1552e-04, -1.0300e-02, -4.6656e-02,
         7.6491e-03,  2.5097e-02,  1.4654e-02, -6.3726e-04, -2.3644e-03,
         1.9667e-03,  4.8000e-03,  1.2354e-03,  4.1071e-02, -8.5145e-03,
         1.8279e-02,  2.5731e-03,  1.7016e-02,  3.8828e-02,  1.9839e-02,
        -1.3867e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3547e-03,  2.4543e-01,  2.1196e-03,  1.8393e-03, -2.4395e-03,
         7.3634e-04, -5.9112e-04,  1.1479e-03, -8.4689e-04,  2.8710e-04,
         5.9447e-04, -1.8946e-04, -1.6267e-03,  1.1984e-04, -1.9160e-04,
        -1.8457e-04, -3.5853e-03,  6.6152e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0006,  0.2466, -0.0025, -0.0016,  0.0010, -0.0025, -0.0008,  0.0015,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 2.6479e-02,  7.2550e+00,  3.7853e-03, -2.6854e-02,  1.7471e-02,
         3.6746e-02, -2.3130e-02, -2.3913e-03,  1.2086e-02,  1.2589e-02,
        -1.2938e-02,  1.4718e-02,  1.5457e-02, -7.2212e-03, -7.9880e-04,
         1.2174e-02,  2.1927e-02, -1.8742e-02,  1.7723e-02,  2.8157e-03,
        -7.3879e-03, -6.4923e-04, -9.0675e-03, -2.3187e-03,  1.5194e-02,
        -1.9856e-04,  5.9450e-03,  4.9949e-03,  2.4844e-03, -5.6059e-03,
         4.6442e-03,  6.2719e-03, -3.7869e-03,  3.0899e-04,  1.4142e-02,
        -4.2475e-03, -1.4616e-02,  1.2313e-02,  1.0044e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6880e-01,  4.1111e+01, -8.2893e-01, -2.0679e-01, -7.6899e-02,
         4.0050e-02, -4.5306e-02, -8.2801e-02, -8.6234e-02,  6.4538e-03,
        -2.7898e-02,  1.3116e-03,  2.6821e-01, -1.5674e-01, -5.6674e-02,
        -2.2452e-02,  2.1861e-02, -3.7789e-02, -3.3948e-02,  9.2665e-02,
        -7.5459e-02, -7.2903e-02,  3.0883e-02, -7.0143e-02, -4.7983e-02,
        -8.3553e-02, -2.0628e-01,  3.2877e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1376e-01,  1.3490e+02, -4.9072e-01,  5.6583e-01, -4.4749e-01,
        -2.7888e-01,  2.3191e-02,  2.6225e-01,  2.1549e-01,  5.6238e-01,
         5.5686e-01, -4.2886e-01,  3.7503e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5406e-01,  1.9540e+02,  8.9225e-01, -1.3710e+00, -1.3604e+00,
         7.6354e-01,  6.9644e-01, -1.6503e-01, -9.6835e-01,  8.2699e-01,
        -5.4341e-01, -1.0787e+00, -6.5168e-01, -2.6935e-01, -3.7678e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1220e-01,  3.3074e+01,  2.7380e-01, -4.3468e-03,  1.0917e-01,
         2.9131e-01, -3.0487e-03, -1.0859e-02,  1.1995e-02,  7.1947e-03,
         3.9379e-02,  1.9541e-03, -2.0568e-01,  2.3215e-01,  8.5387e-02,
         2.0368e-02,  4.9538e-02, -1.9614e-02, -4.3905e-02, -4.4589e-02,
         1.2542e-01,  1.7045e-02,  8.9364e-02, -2.6288e-02, -3.7597e-02,
        -8.8665e-04,  1.9409e-02,  5.1596e-02, -6.8795e-02,  2.2782e-02,
         3.8955e-02,  1.3877e-02,  2.0535e-02,  1.1513e-01,  4.6132e-02,
         3.2543e-02, -6.2307e-02, -1.5536e-02, -2.4740e-02,  5.8339e-02,
         1.0746e-01,  1.9530e-02, -7.2589e-03, -2.4041e-02, -4.8825e-02,
        -3.2084e-03,  1.2665e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6610e-01,  2.0394e+01,  1.7457e-01,  1.2014e-01,  8.9102e-02,
        -3.7276e-02, -2.7864e-03,  8.7389e-02, -9.2133e-02,  2.6424e-02,
         3.4509e-03,  3.2044e-02, -7.1163e-03, -1.3096e-02,  2.2625e-02,
        -1.4336e-02,  4.9362e-03, -5.5101e-02, -1.3817e-02, -2.3350e-02,
        -1.6031e-02,  1.2489e-02, -2.6279e-03, -1.8997e-02,  1.0009e-02,
         3.6213e-02,  3.0270e-01,  2.9970e-02,  2.2343e-02, -2.1420e-02,
         4.5830e-03,  1.5133e-02,  7.5195e-02,  1.3078e-03, -3.3006e-02,
        -6.8954e-03,  2.9925e-02,  1.1449e-02,  1.4157e-02,  8.1798e-02,
        -1.6193e-03, -4.0017e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9097e+00, -6.6358e+01,  1.0093e+00,  3.5529e-01, -2.6654e-02,
        -1.6719e-01, -1.1583e+00,  8.2457e-02,  1.2475e-01, -1.1552e-01,
         1.7492e-01,  1.3862e-01,  7.9817e-02,  1.4978e-02, -6.9045e-01,
         9.0486e-02, -2.5213e-01, -3.1220e-02,  2.5275e-02,  1.4293e-02,
         3.3542e-01, -1.5648e-02,  7.4161e-01, -3.1733e-02,  7.4504e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5010e-02,  6.8427e+00, -1.4007e-02, -5.8377e-03,  1.7573e-02,
        -1.8307e-02,  6.3710e-02, -6.1149e-04,  2.2830e-02,  7.5318e-05,
         1.0476e-02, -1.8392e-02,  4.9336e-03, -1.8023e-02,  2.9754e-02,
         2.5683e-02,  1.8398e-02,  5.5550e-02, -2.1148e-02,  5.0354e-03,
         1.9477e-03,  9.9119e-03,  1.2805e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3738e-02, -1.2611e+02,  7.5857e-01, -1.8202e-01, -8.2001e-01,
        -3.4566e-01,  2.3680e-01, -1.8205e-01,  4.5733e-01, -1.8628e-01,
        -5.7964e-01, -3.3959e-01, -1.0465e+00,  3.0262e-01,  4.1772e-01,
        -9.2658e-03, -1.3558e-01, -6.7022e-01,  3.6245e-01,  9.7645e-01,
         1.3141e-01, -3.0357e-01, -1.3211e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4965e-02,  6.4905e+00,  8.7408e-02, -2.5746e-02, -3.0896e-02,
         5.5304e-03,  1.5315e-02,  6.0149e-02,  5.3735e-02,  3.1256e-02,
        -1.2272e-01, -1.0435e-01,  7.8454e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0114e+00, -2.2210e+02, -2.0914e+00,  7.0081e-01, -6.0440e-01,
        -2.2631e-02,  6.7284e-01,  8.9727e-01, -8.6287e-01, -7.1615e-01,
         8.9322e-01, -1.2297e-01, -5.0982e-01,  3.3217e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8157e+00,  1.8529e+02,  1.0156e+00, -3.4525e-01, -2.4319e-01,
        -1.6928e-01,  1.7222e-01,  1.2656e-01, -8.2295e-02,  7.5811e-01,
        -5.1103e-01, -6.1196e-01, -8.9912e-01, -7.8680e-02, -1.2333e-01,
         5.2651e-01, -2.0826e-01, -7.2130e-01,  6.6422e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 1.4806e-02,  4.8624e+00,  6.4202e-03, -2.1861e-02,  1.8655e-02,
         2.0964e-02,  6.6768e-03,  5.3585e-03, -2.1028e-03,  6.6636e-04,
         1.3109e-02,  6.9975e-03, -1.9273e-04,  1.7846e-02, -4.9893e-03,
         5.7655e-04, -6.8147e-03, -4.5397e-03,  2.3873e-02,  2.2694e-02,
        -9.3538e-03, -4.1631e-03, -1.2799e-02,  7.7731e-03,  3.9869e-04,
        -3.2522e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.7441e-03,  4.3681e+00,  7.7420e-02,  8.4572e-03, -7.3897e-03,
        -2.5973e-02, -2.8965e-02,  2.4857e-03,  1.2661e-03, -7.9532e-02,
         1.8291e-02, -1.2761e-02,  1.0615e-02,  2.3618e-02, -2.1196e-03,
        -2.0590e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0406e-02,  3.3917e+00, -4.3399e-02,  1.3051e-01,  4.8762e-04,
         1.4541e-02,  2.0659e-02,  1.1788e-02,  4.0184e-03, -1.6139e-03,
        -1.4634e-03, -3.4304e-02, -6.6400e-03,  1.3610e-02,  2.3586e-02,
         1.4548e-02,  1.0391e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0436, 23.2846,  0.5319,  0.1264,  0.2385,  0.0320,  0.0582,  0.2307,
        -0.2434,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1249e-02,  1.1693e+00, -1.1548e-02,  8.2675e-03, -3.4728e-03,
        -1.4467e-03,  1.5638e-03, -3.4768e-03,  1.1028e-04, -6.8345e-04,
         9.7946e-04, -4.8023e-03,  1.3214e-03,  2.1840e-03, -3.2554e-03,
         2.6054e-03,  2.7307e-03,  1.1179e-03,  2.1075e-03,  1.8361e-04,
        -6.6013e-04,  2.6657e-03,  2.0951e-03,  9.4829e-04, -2.9082e-03,
        -1.8122e-03,  5.8408e-03, -1.4585e-03,  1.9044e-03,  1.2962e-03,
         1.7928e-03,  9.1469e-04,  1.7088e-03, -1.1677e-03,  2.3363e-03,
         1.4814e-03, -7.4607e-03, -4.9700e-04, -2.8913e-03, -9.0150e-04,
        -4.9348e-03, -5.0079e-03, -3.0761e-03, -1.2230e-03, -4.5085e-04,
        -4.3828e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0450e-02,  4.0918e-01, -2.1214e-03, -1.7523e-03, -8.4681e-04,
        -4.8783e-04,  1.1709e-03,  3.2566e-04, -4.0791e-04, -1.0286e-03,
        -1.5750e-03,  1.5366e-03,  2.2703e-04, -3.1623e-05, -8.3869e-04,
         1.0014e-03, -4.6635e-04,  7.6750e-05, -7.4849e-04,  4.6926e-04,
         3.8513e-04, -3.6210e-04, -3.1320e-03, -4.7800e-05, -1.8928e-04,
        -7.1698e-04, -1.4061e-03, -1.0756e-03, -1.4949e-03,  7.7736e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4769e-01, -5.1497e+00,  1.9269e-02, -1.7112e-04,  5.3832e-02,
         1.8653e-02,  1.5487e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2180e+00, -2.4687e+02, -7.7723e-01, -1.5331e-02,  3.2527e-01,
        -3.9075e-01, -6.1120e-01, -5.0117e-01,  3.2552e-01,  1.0285e+00,
        -6.7926e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1237e-01,  1.2688e+02,  1.0770e+00,  1.2956e+00,  1.0629e-01,
         3.1434e-01, -7.9419e-01, -8.3180e-01,  1.1629e+00,  8.4783e-02,
        -5.7517e-01,  4.8542e-01,  2.1849e+00, -2.5573e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2819e-01,  1.6509e+01, -2.7946e-03,  1.2077e-02,  4.9774e-02,
        -9.4740e-03,  3.1900e-02,  6.2075e-02,  5.0388e-02,  4.6481e-02,
         3.7271e-02, -2.7493e-02, -3.3824e-02, -1.0324e-01,  1.6975e-03,
         1.8822e-02, -5.7608e-04,  1.5955e-02,  1.6568e-02,  6.9082e-02,
        -1.4450e-01, -2.0729e-03, -6.2600e-02, -1.3982e-02, -3.4252e-02,
        -2.3343e-02, -4.6469e-04, -1.9313e-03, -1.4176e-02,  8.7426e-02,
        -3.0991e-02,  3.5431e-02,  8.7004e-03,  6.4941e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5700e-02,  2.2163e+00, -3.2454e-02,  2.8589e-02, -1.3301e-05,
         1.5442e-03, -7.7053e-03, -1.4000e-02, -2.3909e-03,  5.8837e-04,
        -4.2904e-03,  5.6248e-04, -8.4090e-03, -2.9042e-03,  1.6739e-02,
        -7.6453e-03, -1.1814e-02, -3.3073e-03, -4.3030e-03, -1.9439e-03,
        -1.5230e-03, -9.6170e-04, -8.8436e-03, -5.5011e-03,  8.4978e-04,
        -4.6892e-03, -6.5352e-03, -3.6752e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4668e-02,  2.3448e+01, -2.3600e-01,  4.7152e-02,  4.4996e-02,
         8.5515e-02, -7.6266e-02,  6.1811e-02, -5.1565e-02,  2.7147e-02,
        -8.1570e-03, -6.6386e-02,  1.5922e-01,  1.3597e-01,  1.0841e-01,
        -2.7224e-02, -1.1799e-01,  1.1172e-01,  6.2872e-02, -3.5060e-03,
        -1.8566e-03,  3.1215e-02, -1.3783e-01,  2.0890e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-8.7312e-01, -9.0859e+01, -4.8661e-01,  9.8753e-02, -4.4661e-03,
        -3.2213e-01,  9.9774e-04,  4.2934e-01, -4.2940e-01,  8.3612e-02,
         1.2542e+00,  4.4851e-01, -2.7646e-02, -3.3231e-01, -1.1294e-03,
        -3.6929e-01,  8.1526e-01,  1.1699e-01, -2.7316e-02, -2.7756e-01,
        -4.2574e-02,  3.4935e-02, -1.2253e-01, -1.3892e-01, -7.7191e-03,
        -2.7729e-01,  4.8705e-01,  1.4889e-01,  2.7384e-01,  2.3260e-01,
        -4.4896e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0892,  3.9853,  0.0171,  0.0209,  0.0078, -0.0156, -0.0211, -0.0050,
         0.0097,  0.0083,  0.0123,  0.0189,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9801e-02,  2.7798e+00, -6.3595e-03, -3.0098e-02,  2.9882e-02,
        -1.0244e-02, -9.7573e-03, -5.1184e-03, -8.3636e-03,  7.0216e-04,
        -6.2135e-03,  1.0383e-02, -4.3073e-03, -4.7437e-03, -3.1490e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.3402e-01,  1.0976e+02,  1.8972e+00, -5.6561e-02, -1.1396e-01,
         6.2290e-02,  1.8594e-01, -1.0146e-01,  2.1822e-02, -3.8063e-02,
        -7.7677e-01,  7.1883e-03, -9.6211e-01,  3.8319e-02,  7.3804e-02,
        -7.6492e-02, -1.7639e-01, -2.7714e-01,  4.8319e-01, -1.9582e-01,
        -1.2216e-02, -4.8431e-02, -5.1404e-01,  5.4760e-02,  1.2531e+00,
         1.4851e-01, -4.5939e-02, -2.7926e-01, -3.5016e-01,  1.7182e-01,
         1.1231e-01,  1.0113e+00,  4.3365e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8285e-01,  3.1853e+01,  1.2470e-02, -1.7979e-01,  5.4127e-02,
        -3.2203e-02, -2.5953e-04, -3.1010e-02,  5.7734e-02, -1.6670e-01,
        -1.0915e-01, -1.6097e-01, -1.2315e-01, -4.4588e-02, -2.7347e-01,
         3.3081e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9289e-03,  1.3487e+00, -5.7238e-03,  1.1999e-02,  1.1433e-03,
         4.7330e-03, -6.4718e-03, -5.5416e-03, -1.3516e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2132, 11.9593,  0.0335,  0.1781,  0.7129,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.3144e-01, -1.2253e+02,  2.4535e+00,  4.8815e-02,  4.1081e-01,
         4.7872e-01,  3.6290e-01, -2.5076e-01,  3.4522e-02, -4.8114e-01,
        -5.4920e-01, -1.0105e-01,  1.4178e-01, -3.4235e-01,  1.1817e-02,
        -8.4648e-02,  2.5430e-02,  2.1143e-04, -2.1050e-01, -2.0397e-01,
        -1.7113e-02, -4.2169e-01,  1.3640e-01, -3.9208e-01, -1.1788e-01,
        -1.9379e-01, -2.3949e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([   0.6455, -116.7136,    0.6388,    0.9113,    1.6919,   -0.5188,
          -2.8875,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0442e+00, -1.3083e+02, -1.1655e+00, -6.2305e-01, -3.7300e-01,
        -4.6365e-01,  5.7531e-01, -4.4798e-01,  4.1774e-01,  1.4925e-01,
         1.1331e-01,  3.9384e-01, -5.8982e-01,  4.1309e-01, -5.0014e-01,
        -4.0663e-01, -1.1333e-01,  5.0378e-01, -4.9095e-01, -2.9007e-01,
         3.6630e-01, -1.2315e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0495e-02,  3.0267e+00,  1.0834e-02,  1.1546e-02,  1.4325e-02,
         1.3166e-02, -3.1095e-03,  1.1519e-03, -1.0095e-04, -3.5156e-02,
         1.5321e-02,  5.7319e-03, -2.2598e-02,  1.7296e-03, -6.5054e-03,
        -2.0031e-03, -3.3710e-03, -2.2224e-02, -1.4933e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1729e-01,  3.8424e+01,  3.8725e-01,  1.5995e-01,  1.4344e-01,
         1.2391e-02, -1.8822e-01,  1.3775e-01,  1.4550e-01,  5.5250e-02,
        -2.9446e-02,  2.7089e-02, -1.6559e-02,  1.2208e-02,  1.7083e-01,
         3.8757e-02, -1.3561e-01,  6.6965e-02, -4.8735e-02, -3.6221e-02,
         7.4190e-02,  4.0273e-02,  7.1293e-02, -1.5277e-02, -1.2045e-01,
         3.2857e-02,  2.6040e-02, -2.9806e-03, -6.6731e-02, -2.2013e-02,
         2.9030e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 1.0102e-01, -1.3898e+01, -3.7987e-01, -1.3107e-01,  6.4053e-02,
         8.6551e-03, -3.1592e-02, -3.0982e-02,  4.4644e-02,  9.0045e-03,
        -1.8085e-02, -1.5440e-02,  7.1819e-02, -1.5099e-01, -4.3306e-02,
         5.3816e-02,  1.1282e-01, -1.2405e-02, -3.2007e-02, -9.6318e-03,
        -9.6035e-03,  7.2081e-03,  4.0243e-03,  1.5443e-03,  1.6410e-02,
         1.0602e-02, -3.7681e-03,  3.7765e-02,  5.2842e-03,  3.1451e-02,
        -1.2382e-02, -1.6824e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6086e+00,  1.7753e+02, -4.9938e-01, -4.7109e-01,  2.3713e-02,
         1.7271e-01, -6.3107e-01,  7.9981e-01,  1.6252e+00,  1.4699e-01,
         7.5786e-03,  7.2103e-01, -1.8540e-01, -5.7472e-01,  5.3244e-01,
         5.0304e-01,  1.1834e+00, -4.8794e-03, -6.6677e-01,  8.5305e-03,
         4.4996e-02,  5.4256e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5471e-01,  8.9629e+01, -6.6125e-01, -1.3530e-01, -6.2605e-03,
         3.0052e-01,  1.7634e-01,  2.0137e-02, -1.1969e-01,  1.4815e-01,
         2.0157e-01,  2.9380e-02, -1.7778e-01,  6.2844e-02,  1.2593e-01,
         2.6197e-01, -1.8619e-01,  1.8176e+00, -3.9610e-01,  4.3201e-01,
        -1.1501e-02,  2.5018e-02, -7.8157e-03, -1.8765e-01, -7.9282e-02,
         2.2125e-02, -1.0533e-01, -5.0183e-01, -3.5707e-02,  1.1409e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2354e+00,  9.1066e+01, -1.0360e+00,  1.0185e-01,  1.2936e-02,
        -5.8888e-01, -3.5082e-01, -1.2088e-01, -2.1255e-01, -6.5024e-02,
         1.8358e-01,  4.8855e-01,  5.0317e-02, -1.8551e-02, -8.5395e-02,
         1.7977e-01,  1.2653e-02, -3.4468e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5118e-02,  7.4426e+01, -1.3130e-01,  1.7681e-02, -4.5436e-01,
        -1.4567e-01, -6.7709e-02,  1.0600e-01,  3.9518e-02,  2.3888e-01,
         1.2099e-01,  4.6561e-02,  3.1379e-02, -6.7586e-02,  5.6886e-02,
        -2.6770e-01,  6.8297e-02,  3.0345e-02, -4.5655e-01,  2.5126e-02,
        -1.4478e-01,  9.9512e-03,  2.9821e-01,  1.8370e-01,  5.7949e-02,
        -9.0729e-02, -3.9577e-01, -7.2249e-02, -1.3839e-01, -3.0819e-02,
         4.4995e-02, -1.5282e-01, -4.4000e-02, -3.2178e-01, -1.0465e-01,
        -1.9828e-01,  9.6220e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5538e+00, -2.1019e+02,  1.3321e+00, -5.0871e-01, -1.6378e-01,
        -7.1130e-02, -4.5069e-01,  8.7382e-01,  2.0378e-02, -4.7466e-01,
        -5.0141e-01,  5.0528e-01, -2.2290e-01, -2.3185e-01, -5.5745e-01,
        -1.1538e-01,  2.7392e-01, -2.9015e-01,  7.9323e-03, -5.3531e-01,
         3.7012e-01, -2.2338e-01, -8.2182e-01,  4.7704e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4589e-02,  1.3883e+01, -1.4595e-01, -8.7840e-02, -4.0255e-02,
        -1.0266e-02, -3.6065e-03, -2.5497e-02, -1.4090e-02, -2.9414e-02,
        -8.9590e-03, -1.4029e-02, -1.0861e-01, -5.7741e-03,  2.0917e-02,
         1.6781e-03, -4.0247e-02, -3.2693e-02, -1.7398e-02, -8.9517e-03,
         2.1041e-03, -3.4226e-02,  7.3652e-03, -2.2376e-02,  1.4678e-03,
        -1.6393e-02, -1.7670e-02, -7.9380e-03,  3.2114e-02, -6.8581e-03,
         1.2569e-03, -1.6175e-02,  8.1215e-03,  1.1840e-02, -8.1733e-03,
        -1.7724e-02, -1.7664e-02, -4.6578e-03,  5.6192e-03, -1.7403e-02,
         1.7657e-02, -2.8815e-02,  1.6286e-02, -2.1414e-02,  1.1105e-02,
         2.9433e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.2301e-03,  6.1521e-01, -3.4992e-03, -5.9728e-03, -2.8667e-03,
         1.6904e-03, -7.6139e-03, -7.4476e-04, -2.0121e-03, -1.1789e-03,
        -1.4846e-04,  2.5162e-04,  4.5631e-04, -1.1525e-03,  3.0595e-04,
         1.3941e-03, -1.8014e-03,  1.4226e-03,  2.1617e-03, -1.8059e-03,
        -6.9124e-04,  2.1279e-04,  3.0918e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8390e-01,  4.7736e+01,  3.9556e-01, -1.9283e-01,  4.3908e-01,
         1.6187e-01,  6.3789e-02,  6.3688e-02,  2.9879e-01, -2.2301e-01,
         1.8922e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8979e+00,  5.8101e+01, -2.4321e-01,  1.0661e-01,  5.1674e-01,
         1.7723e-01,  4.1134e-02, -4.0621e-01,  4.0948e-01,  7.5151e-02,
        -1.2213e-01,  5.9585e-02, -2.0357e-01, -6.6616e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.5099e-01,  6.5580e+01,  5.7753e-01,  4.3317e-01, -8.1479e-01,
         2.0790e-01, -6.5907e-01,  2.3726e-01,  1.5355e-03, -7.0787e-01,
        -2.5787e-01, -3.3890e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7808e-02,  1.4642e+01,  3.0057e-02,  5.3666e-02, -4.0870e-02,
         2.0576e-02, -8.0241e-03,  1.0427e-02,  1.1795e-02,  5.4436e-02,
        -1.4758e-02, -3.3025e-02, -1.2014e-02,  3.4665e-02, -1.6182e-02,
        -2.6244e-04, -2.7168e-03,  3.3411e-02,  1.5025e-02, -6.5361e-02,
         1.2391e-02,  2.1640e-02,  4.1992e-02, -3.2446e-02,  7.5012e-03,
         1.8634e-02,  9.8923e-03,  3.1308e-03, -1.9551e-02,  2.2885e-02,
         4.4448e-02,  5.0366e-02,  2.6863e-02,  3.6464e-02,  3.3094e-02,
        -2.2898e-02,  7.1009e-03,  1.3935e-02,  3.4014e-02, -2.4346e-02,
         7.7716e-03,  8.5842e-03, -1.9367e-02,  5.1829e-03, -1.4205e-02,
        -2.9054e-03,  8.6216e-03, -5.4320e-03, -3.9703e-04, -5.8607e-03,
        -4.7205e-03,  1.8320e-02, -1.8476e-02, -1.1780e-02,  4.1947e-02],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-1.1840e+00,  7.9617e+01, -1.9015e-01, -4.4294e-01,  1.1785e-01,
         4.1093e-01, -2.8117e-01,  1.8880e-02, -4.0762e-01, -1.1076e-01,
        -6.4171e-01,  1.4342e-01,  2.0766e-01,  9.1820e-02, -8.3809e-02,
         7.3904e-02, -1.2698e-01,  7.1609e-02,  2.3155e-01, -7.0744e-02,
        -3.9054e-02,  4.4289e-02, -1.5450e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6057e+00, -1.6007e+02, -2.0631e+00,  1.1947e-01,  2.4753e-02,
         1.8991e-02, -5.5789e-02,  1.9130e-01, -6.4743e-02,  4.9725e-01,
        -9.6958e-02,  5.1370e-01,  2.7819e-01, -2.5444e-01,  5.9159e-01,
         2.2998e-01,  1.0223e+00,  2.6502e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9685e-01,  1.4223e+01, -1.1956e-01, -7.0599e-02,  1.1890e-01,
        -1.7110e-02,  2.8539e-01,  6.5895e-02,  2.4263e-02, -8.9774e-02,
        -1.0371e-01, -5.1469e-02,  2.1753e-01, -3.4717e-02, -6.7218e-02,
         6.9588e-04, -8.2494e-03,  8.2178e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5533e-02,  1.1662e+01,  1.8056e-02, -2.2156e-02, -1.0851e-02,
         1.2550e-01,  1.1548e-01, -4.5070e-02,  5.2310e-02, -3.9116e-04,
         1.7586e-02,  4.6561e-02,  3.8027e-02,  7.8150e-02, -3.5041e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.3333e-02,  3.9887e+00,  3.7825e-02, -1.3521e-02, -1.4001e-03,
        -1.2519e-02,  1.8444e-03,  4.1355e-03, -8.3202e-03, -8.7548e-03,
         8.0437e-03, -1.7804e-02, -1.1195e-03,  2.6390e-04, -1.5614e-02,
         2.9446e-02, -4.0711e-02, -1.0372e-02, -5.9831e-03,  2.4044e-02,
         6.0214e-03,  4.7443e-03, -3.6430e-03, -6.2582e-03, -1.9262e-02,
        -1.7176e-03, -9.7000e-04, -2.2565e-03, -1.2576e-03,  4.5512e-03,
        -1.0977e-02,  8.4039e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7979e-01,  2.8595e+01,  2.8145e-01,  1.9020e-01,  1.6279e-01,
         1.4779e-02,  2.3175e-02,  3.5893e-02, -1.0283e-01, -8.4318e-03,
         1.3536e-01, -2.5768e-02, -1.1387e-01,  5.0128e-02, -2.5754e-02,
        -3.8861e-02, -1.8092e-02,  7.6915e-03, -1.1379e-01,  4.1278e-02,
        -1.9250e-02,  7.0111e-02, -5.1017e-02, -7.4923e-02,  2.0991e-02,
         2.8993e-02,  9.7952e-02,  5.1761e-02,  1.2686e-02, -2.2581e-02,
        -8.2964e-03,  9.1282e-03, -1.0755e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5714e-01, -6.4738e+01,  3.9813e-02,  2.5892e-02, -1.1364e-01,
         1.7646e-02, -1.4084e-01, -3.5210e-02, -1.6819e-01, -1.2832e-01,
        -9.4389e-02,  1.1627e-01,  6.7585e-02,  3.9870e-01,  1.6069e-02,
         5.3992e-03, -1.6531e-01,  3.2370e-02, -1.0401e-02, -4.5657e-03,
         1.4111e-02, -2.8557e-02, -2.4231e-02, -1.8676e-01,  5.3498e-02,
        -9.7916e-02, -5.3333e-02, -4.4146e-02,  8.1812e-03,  8.3169e-02,
        -6.2477e-02,  1.6695e-02, -7.8240e-02,  5.0078e-02, -1.2633e-01,
        -1.6486e-01, -5.0737e-02, -3.6008e-02, -4.5934e-02, -5.2419e-02,
        -6.9457e-02,  5.4107e-02, -1.1894e-02, -1.0423e-01, -9.7459e-03,
        -8.9123e-02,  1.1058e-02, -6.4636e-02, -6.6301e-02,  5.9674e-02,
         8.5842e-03, -1.3625e-01, -5.8674e-02,  3.6780e-02, -6.2125e-02,
        -4.3802e-02, -2.8098e-02,  1.6857e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0410e-01, -1.1138e+01,  2.5384e-01,  9.4285e-03,  1.4370e-02,
         6.6633e-02,  2.8900e-02, -1.9641e-04,  2.2903e-02,  2.7982e-02,
         1.6579e-02, -2.8553e-03, -9.3136e-03, -4.4872e-02, -5.4704e-02,
         5.3823e-03, -1.5915e-03, -1.1023e-04,  2.4945e-02,  1.2503e-02,
         2.5783e-02,  1.9565e-02,  2.4952e-02,  4.1087e-02,  3.3296e-02,
         6.5486e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.6170e-02,  4.3091e+00, -3.9367e-02,  3.9549e-02, -3.3100e-02,
         9.6432e-03,  1.2664e-02,  7.8387e-03,  1.8050e-02, -3.5537e-03,
         2.0478e-02,  1.0847e-02,  3.1140e-02, -2.2075e-03,  1.1832e-02,
         1.1287e-03,  6.1792e-03,  8.7668e-03,  3.1940e-02, -2.6131e-03,
         3.2592e-03, -1.8697e-02, -1.5230e-02,  1.8031e-02, -1.5994e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9127e-01,  4.7957e+01,  6.5338e-02,  2.6661e-01,  2.8775e-02,
        -5.6641e-02, -1.0073e-02, -1.2446e-01,  8.1082e-02,  9.2905e-02,
         2.1302e-01, -6.4473e-03, -1.7091e-01,  4.4672e-03, -6.0234e-02,
        -3.4720e-02, -1.0386e-01, -3.8570e-03, -8.7147e-03,  5.6476e-02,
        -2.1071e-02,  1.8003e-02,  5.8571e-02, -1.1127e-01,  8.8700e-03,
         1.5522e-01,  2.3721e-02,  8.6657e-02, -5.8840e-02, -3.3108e-02,
         4.0751e-02, -1.0988e-01, -1.0548e-01, -4.6206e-02, -6.0012e-02,
        -1.9732e-01, -5.2847e-02, -6.9981e-02, -5.8384e-02,  4.9638e-02,
        -4.7865e-02, -5.5222e-02,  1.7248e-01,  4.5162e-02, -7.8147e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8081e-01,  1.1175e+01, -5.7830e-02,  8.3052e-02, -1.2407e-02,
        -2.1734e-02, -4.8709e-02,  4.3789e-02, -1.4223e-02, -5.2839e-02,
        -5.0124e-02,  2.9201e-02, -1.0279e-02, -2.2677e-02, -2.8298e-02,
         1.6322e-02, -3.2366e-02, -1.5953e-02, -7.1638e-04,  4.4434e-02,
        -3.0416e-03,  5.1198e-02, -6.5943e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7897e-01,  1.8391e+01, -3.4709e-01, -2.9306e-03, -1.2571e-01,
        -2.8223e-02,  2.2570e-02,  1.2406e-01,  1.8457e-01,  1.1856e-03,
         1.4440e-01,  4.4105e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 4.1977e-03,  1.6080e+00,  2.3119e-02, -1.3677e-02, -1.4102e-02,
        -7.1628e-03, -1.1994e-02, -2.5408e-03, -9.3004e-04, -8.2917e-03,
        -5.6073e-03, -3.5166e-04, -9.4848e-03,  8.0679e-03, -1.0293e-02,
        -3.0172e-03, -1.5671e-03, -3.2613e-03,  2.6947e-03, -6.7866e-03,
         1.5555e-04,  6.2427e-04,  3.8963e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2917e-01,  1.6457e+01,  9.8674e-02, -1.1258e-02,  1.9782e-02,
        -8.8559e-03,  1.4192e-02,  3.0361e-02, -1.0442e-02,  1.6422e-02,
         2.8142e-02, -1.0833e-02,  4.0793e-02, -5.2798e-03,  1.0751e-02,
         2.8490e-02,  1.0866e-02,  1.6192e-02, -8.9712e-03,  1.0960e-02,
         7.6782e-02, -1.6342e-02, -9.6690e-04,  5.9736e-03,  2.8132e-02,
         3.1890e-03, -3.8325e-02, -9.4554e-03, -2.3600e-02,  1.4807e-02,
         1.3856e-02, -3.1277e-02,  7.3959e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6352e-01,  3.5013e+01, -2.5907e-01, -2.0681e-02,  3.6702e-01,
        -7.0911e-02, -1.6902e-01,  8.5817e-02,  6.5208e-02,  9.8712e-03,
        -5.5473e-02, -6.8549e-02,  2.7628e-02,  1.0373e-01, -8.9538e-02,
        -1.0972e-01,  2.9834e-02, -3.2294e-02,  1.0455e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0202e-02,  1.9799e+00, -5.9214e-03, -1.2221e-02, -8.7976e-03,
         1.6705e-03,  2.7018e-03,  4.5032e-04,  4.8557e-03, -1.1051e-02,
        -1.0886e-02,  1.8705e-02, -9.1662e-03,  1.5353e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5710e-02,  1.6637e+01, -7.2324e-02, -4.3618e-02,  1.4787e-02,
         1.1405e-01,  5.8033e-02, -1.3759e-02, -8.2256e-03, -8.2792e-03,
        -5.5075e-02, -8.9002e-03, -1.2400e-01,  8.0991e-02,  3.3370e-03,
        -3.8109e-02,  2.0456e-01, -8.0531e-03, -2.6760e-02, -1.5968e-02,
         2.3301e-01,  3.5304e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0773e-02,  2.4198e+00, -4.4742e-03, -1.7563e-02, -4.3188e-03,
        -5.2020e-03, -9.9815e-03, -2.3053e-02, -1.2547e-02,  1.0579e-03,
        -4.1434e-03,  2.6365e-03, -4.6378e-03, -5.9787e-04, -8.1657e-03,
         7.8170e-03,  3.7095e-03,  4.4265e-03,  4.4629e-03,  5.5262e-05,
        -3.2547e-03,  3.7111e-03, -8.4549e-04, -1.6313e-03, -3.3653e-03,
        -2.7176e-03,  4.1016e-03, -6.1305e-05,  4.8190e-03,  1.0125e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8915e-02,  8.3641e+00,  3.5930e-02, -1.4844e-02, -4.2975e-03,
         4.2112e-02, -3.5753e-02, -1.2728e-01,  5.8575e-02,  5.1135e-02,
        -1.0810e-01,  1.3361e-02,  6.6943e-03,  1.7685e-02,  1.8103e-02,
        -3.5225e-02, -1.1380e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.9988e-02,  8.1432e+00, -5.3934e-03,  1.1887e-01, -3.2394e-02,
        -1.3150e-02, -5.5115e-02,  1.0709e-01, -1.2618e-02, -1.6176e-01,
         7.8562e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3757e-01,  3.5341e+01,  2.0551e-01,  4.7922e-02, -8.3929e-03,
         3.7550e-01,  2.2875e-02, -2.5493e-02, -2.0918e-01,  9.5237e-02,
        -5.7945e-02,  9.9446e-02,  3.8912e-02,  4.4020e-01, -2.5859e-01,
         4.3758e-02,  9.6049e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.8359e-03,  2.0541e+00, -1.7081e-02,  7.3979e-03, -6.0830e-03,
        -2.9529e-04,  3.0235e-03, -2.1050e-03, -2.5352e-03, -1.6654e-03,
        -7.4441e-03,  5.4567e-03, -6.6913e-03, -1.1070e-02, -6.8410e-03,
        -8.0628e-04,  2.5704e-03, -3.9656e-03, -1.7101e-03,  3.6615e-03,
        -1.4089e-03, -3.8236e-03,  1.5654e-03, -1.6631e-03,  7.4584e-03,
        -1.0244e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6779e+00,  5.3681e+01,  8.9783e-01,  9.8159e-02,  8.3226e-03,
         1.9562e-01,  1.7116e-01,  3.7695e-02, -2.7668e-01,  1.4582e-01,
        -1.4191e-01, -1.7952e-01,  2.3481e-02,  8.2568e-02, -2.5617e-01,
        -1.5771e-01, -2.4386e-02,  8.2096e-03, -1.4301e-01, -1.3472e-01,
        -3.2207e-01,  3.6818e-04, -6.6876e-02,  4.2691e-02, -1.1653e-01,
        -7.1709e-02,  2.8306e-01, -9.1426e-02, -1.3065e-01, -1.6268e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0645e-02,  5.5310e+00, -1.4354e-02,  8.0114e-03,  4.9903e-02,
         6.9190e-03,  2.1660e-02, -1.1302e-02,  7.7461e-03, -2.5467e-02,
         2.1151e-03,  1.7279e-02, -4.1267e-02,  6.2545e-03, -1.2893e-05,
         3.8385e-03, -1.3145e-03,  4.9191e-03, -1.1303e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 9.5316e-02,  6.0436e+00, -4.6131e-03,  8.7504e-02,  1.9060e-02,
        -7.2200e-03,  7.6533e-02, -2.7837e-03, -2.1912e-02,  1.4876e-02,
        -1.7955e-03,  6.2187e-03, -4.6592e-03, -3.3358e-03,  1.7744e-02,
        -2.1775e-02,  9.4142e-03,  1.5089e-02, -4.6564e-04,  6.7624e-03,
         9.5397e-03, -2.0645e-04,  6.5577e-03, -3.7938e-02, -9.8350e-03,
         8.0941e-03,  2.8116e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3146e-03,  6.1846e-01,  2.7418e-03, -1.1669e-03,  2.0933e-03,
        -1.3844e-04,  6.9270e-03,  9.2450e-04,  4.7178e-03, -1.4545e-03,
        -1.5701e-03,  2.4553e-03, -3.6412e-04, -3.8398e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4002e-03,  3.9252e+00,  1.7268e-02, -7.1353e-03,  5.7860e-03,
        -2.6320e-02, -8.4582e-03,  2.0705e-03,  5.7294e-03,  6.9987e-03,
        -6.2170e-03, -2.5543e-03, -2.6936e-02,  6.0637e-03,  5.7918e-05,
         6.2258e-04,  1.8913e-03, -1.0856e-03,  2.5609e-02,  1.9488e-02,
        -9.9156e-03, -3.4116e-03,  2.7818e-03, -7.2136e-04, -2.6130e-02,
         9.8555e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.4521e-03,  1.8216e+00, -1.0334e-03, -1.4780e-02, -1.4324e-03,
        -1.5568e-02,  5.3556e-03, -5.4782e-03, -1.2112e-02,  2.4791e-03,
         6.3370e-03,  1.2430e-02,  2.0416e-03, -4.9003e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3437e-02,  4.0644e+01, -5.5900e-02,  4.9054e-03, -1.2149e-01,
         4.7920e-02, -1.0613e-01, -3.2895e-02,  3.4821e-02,  1.2505e-03,
         2.3382e-02, -5.1209e-02,  1.5845e-01, -1.4021e-01,  1.5591e-01,
         3.2459e-02, -2.3292e-02,  3.3323e-02,  7.7003e-03,  2.4103e-01,
        -5.1074e-02,  6.6694e-02,  2.4592e-02,  2.0950e-02,  2.5703e-02,
         1.1698e-01,  1.9203e-01,  9.6106e-02,  8.5934e-02, -1.7081e-02,
        -3.0314e-01,  8.7131e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8457e-02,  8.2931e+00, -5.4891e-02, -6.7127e-02, -2.2279e-03,
         1.2314e-02, -1.6840e-02, -9.6500e-04,  2.6315e-02, -3.4390e-03,
        -2.8337e-02, -5.3326e-03,  2.8671e-02, -1.8658e-03,  7.8053e-03,
         4.0543e-03,  4.7084e-03, -2.6135e-03, -6.3710e-02,  1.6326e-02,
         1.6580e-02,  9.3615e-03,  6.4117e-03,  1.4689e-02, -8.3515e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0244,  0.6474, -0.0024, -0.0023,  0.0054, -0.0056,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8638e-02,  9.9394e-01,  9.0225e-03, -7.0685e-03,  4.3229e-03,
        -4.7403e-03,  1.2738e-03,  1.2273e-03,  7.9911e-04, -7.9817e-04,
        -3.8107e-03, -1.3470e-03, -4.8662e-08, -4.6935e-03,  2.4594e-04,
         1.7093e-05, -8.6822e-04,  1.4993e-03, -6.8245e-04,  3.1827e-03,
        -8.6173e-04, -1.7738e-03, -3.8420e-04,  4.1430e-04,  1.2710e-03,
        -6.1604e-04, -6.8331e-05, -5.4161e-04, -3.0572e-04,  4.5592e-03,
         1.8137e-04, -9.5218e-04, -3.9150e-03,  5.1587e-04,  1.8275e-03,
        -8.6812e-04,  1.2862e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2212e-01, -8.1324e+01, -3.3849e-02, -2.2128e+00, -1.3302e-02,
        -8.0922e-02,  2.6101e-01, -2.6495e-01,  1.9503e-01, -2.2185e-02,
        -1.5204e-01, -1.0868e-01, -3.5217e-01,  9.2767e-02, -8.0124e-02,
         8.1159e-02,  6.3773e-02, -4.7670e-02,  1.3531e-01, -1.5336e-01,
         7.3493e-04, -1.1703e-02, -3.0802e-01, -4.7357e-02,  7.1400e-02,
         4.7462e-01,  2.9479e-01,  1.3562e-01, -5.3112e-02, -5.7116e-03,
         4.5862e-03,  1.0932e-01,  2.2332e-01, -1.4877e-01, -3.3004e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2074e-01,  1.6545e+01, -1.0255e+00, -2.6332e-02,  1.3702e-02,
         7.0929e-02, -8.8376e-02, -5.4366e-02, -8.3459e-02, -1.0439e-01,
         4.5155e-02, -4.3273e-01, -8.9126e-02, -5.2973e-02, -1.0287e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6957e+00,  1.6718e+02, -2.1846e+00, -2.1269e-01,  9.8202e-02,
        -4.2828e-01,  1.8232e-01, -1.5316e-01, -7.0727e-01,  4.7153e-02,
         1.0494e-01, -1.8117e-01, -2.8379e-01, -2.5372e-01, -6.3508e-03,
        -3.5185e-02, -1.0104e-01,  2.3698e-01,  2.2144e-01,  4.0697e-02,
         2.2729e-01,  3.5519e-01, -8.0545e-02,  3.4606e-01,  7.3441e-01,
        -2.7005e-01,  1.6901e-01, -7.7760e-02,  1.4328e-02,  9.3420e-02,
        -1.3325e-01, -1.2202e-01,  3.2547e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2820e+00, -5.3977e+01,  1.0055e+00,  7.3214e-02, -8.3561e-02,
         4.3283e-01, -7.6346e-02,  3.0525e-02, -9.3385e-02, -1.6701e-01,
         2.9159e-01,  3.1491e-01,  1.9559e-01,  1.6000e-01, -7.3034e-03,
         3.2250e-02,  4.9857e-03,  1.2381e-01, -2.0972e-02,  1.8497e-02,
         5.9410e-03, -2.5623e-02,  1.5946e-01, -1.2195e-01, -7.5814e-02,
        -3.6969e-01, -9.1757e-02, -1.9624e-01,  1.2097e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 4.1488e-02,  5.3004e+00,  7.2023e-02, -1.1467e-02, -4.6834e-02,
        -1.5228e-03, -1.3441e-02,  3.9997e-04,  7.3802e-04, -5.7557e-02,
         1.4012e-02,  3.0476e-03, -5.6196e-03, -1.8992e-03, -4.9527e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0630e+00, -1.9627e+02, -1.3124e+00,  8.1004e-01,  5.3005e-01,
        -6.9784e-01,  2.6439e-01,  2.3850e-01, -5.8032e-01,  2.0113e-01,
         4.3087e-01,  1.8182e-02,  1.5239e-01, -7.5144e-03, -1.5852e-01,
         1.9536e-01, -6.0829e-02,  1.9295e-03,  9.9578e-02, -3.3653e-02,
         3.2674e-01,  5.3386e-01,  2.8284e-01, -3.2848e-02,  1.0260e-03,
        -1.0685e-01, -4.7506e-02,  2.5475e-02,  8.0754e-01,  3.7315e-02,
        -1.1176e-01, -1.4392e-02,  3.6817e-01, -1.5291e-01,  2.5743e-01,
        -3.9424e-02,  3.0933e-02, -6.8100e-02, -1.0362e-01,  2.2680e-01,
        -6.6439e-02, -2.8530e-01, -6.9293e-03, -2.7334e-02,  1.3987e-01,
         1.7132e-01,  1.3674e-01, -4.2782e-01,  1.5318e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5491e-01, -8.4042e+01,  1.4316e+00,  2.1904e-02, -4.0855e-02,
        -2.7380e-01, -1.8402e-01,  3.6216e-01,  3.6419e-01,  1.4193e-01,
        -1.3274e-01, -4.5025e-02, -3.4864e-02, -1.2662e-01,  8.5123e-02,
         3.1036e-01, -1.3324e+00,  1.4993e-01, -2.4175e-01, -2.7481e-01,
         2.2858e-01,  1.7738e-02,  6.5241e-02, -1.0314e-02,  9.1421e-02,
         2.0812e-02,  3.8008e-01,  1.1145e-01, -5.5084e-02,  2.4464e-01,
        -4.8915e-02, -2.2100e-01,  8.2345e-02, -2.5814e-01, -1.7347e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.5496e-03,  2.5051e+01, -1.7296e-01,  3.9222e-02,  8.5146e-02,
         1.4138e-01, -3.2335e-01,  5.4257e-02,  6.0174e-02, -7.2201e-02,
        -3.2088e-02, -3.9012e-02, -4.0689e-02, -7.8700e-04,  7.9474e-02,
         3.4054e-02,  1.0101e-01, -8.7318e-03, -1.6120e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9336e-01,  1.2879e+01, -4.2576e-02, -2.1993e-01,  3.0068e-02,
        -1.2806e-01, -4.5145e-02,  4.1116e-03, -5.7875e-02, -8.1557e-02,
         2.2074e-02, -6.0198e-02, -1.6723e-01,  7.9736e-03, -7.0342e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5446e-01,  4.4758e+00,  1.4881e-02,  1.2775e-02,  1.6283e-03,
        -1.4100e-03, -1.0736e-02, -1.0051e-03,  8.4795e-03, -5.2230e-03,
        -6.3213e-03,  2.3694e-03,  1.4842e-04,  1.5238e-02,  7.2291e-03,
        -4.1837e-03,  5.5745e-05, -7.4935e-03, -1.0685e-02,  3.0995e-02,
         3.0473e-02,  7.1638e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.7380e-03,  1.0050e+00, -1.5950e-02, -3.2196e-03, -1.1673e-04,
        -3.4917e-04, -6.0129e-03, -1.2570e-03, -1.1722e-02, -6.6425e-04,
         8.0573e-04, -4.1222e-03, -3.3930e-03, -3.2167e-03, -1.8565e-03,
        -1.1408e-03, -2.2064e-03, -1.1307e-02, -4.5869e-03, -3.9984e-03,
        -1.4451e-03, -1.1213e-04, -3.8582e-03, -4.9404e-03,  2.1946e-03,
        -3.4473e-04, -2.5351e-03,  3.1877e-03, -3.7536e-03,  6.8819e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0566,  0.8321,  0.0034,  0.0080,  0.0122,  0.0046,  0.0016,  0.0014,
         0.0039, -0.0018, -0.0111,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8250e-03,  6.0150e-01,  7.2887e-03,  2.9092e-04, -1.8334e-03,
         1.2848e-03,  2.8756e-03, -1.7476e-03,  1.9132e-03,  9.5186e-04,
        -1.0307e-03,  1.2672e-03, -9.8333e-04, -1.0607e-03, -1.3572e-03,
         2.0306e-03,  8.5856e-04,  2.3576e-04, -4.4584e-04,  2.1009e-05,
         1.3501e-03, -5.5788e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5481e-01,  2.4893e+01,  7.5604e-02,  1.3020e-01,  4.2411e-02,
        -3.2852e-01, -3.7190e-02,  1.5432e-01, -3.8470e-02, -1.4976e-03,
        -1.6233e-01,  5.7379e-03,  7.3757e-02,  1.6961e-03, -9.2996e-02,
        -8.0133e-03,  3.4140e-02, -1.4916e-02, -1.2120e-01,  7.8372e-03,
         5.4868e-03, -1.6932e-02, -1.4421e-02, -2.9217e-02, -1.3215e-02,
         5.2922e-03, -6.6990e-02, -3.4617e-03, -1.6776e-01,  9.2543e-04,
         6.3635e-02,  3.5279e-02, -7.7804e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5598e+00, -1.2950e+02, -6.4513e+00, -1.2541e+00, -3.9675e-01,
        -3.6734e-01, -1.3879e-01,  8.5102e-01,  7.7480e-01,  3.2475e-01,
        -2.2380e-01, -3.5553e-01, -2.0877e-01, -7.4168e-02,  4.3779e-01,
        -1.3886e+00, -1.7185e-01, -1.3709e-01, -1.8179e-01, -1.9089e-01,
        -9.0741e-01, -3.2967e-01, -1.3181e-01, -3.8277e-01, -2.6714e-01,
        -9.1066e-01,  3.3984e-02,  6.6280e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2792e+00,  8.8003e+01, -1.4264e+00, -8.7878e-01, -1.1992e-01,
         3.2072e-02, -3.7274e-01,  1.5949e-01, -6.5321e-01, -1.0038e-01,
        -4.8593e-01, -4.3424e-02, -4.0292e-01,  1.1304e-01, -4.4328e-02,
        -1.0041e-02, -1.8681e-01, -2.0338e-01, -4.9790e-02, -3.3763e-01,
        -9.3817e-02, -1.9015e-01,  4.6158e-01, -3.1904e-01,  1.3535e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-3.2572e-02,  9.4111e+01, -9.9463e-01,  3.1304e-01, -5.4229e-01,
         2.2093e-01, -8.3304e-02,  1.7170e-01, -3.3661e-02, -1.3476e-01,
         2.1141e-01,  6.0509e-01, -1.3868e-01, -2.1425e-01, -1.3864e-01,
        -2.4499e-01, -1.4065e-01,  1.9338e-01, -6.8990e-02, -3.7286e-02,
         1.0688e-02, -5.4551e-02, -2.1124e-01, -1.0706e-01, -1.4117e-01,
         1.7070e-01,  4.5660e-02, -2.3737e-01,  3.9327e-02,  5.7155e-03,
        -3.2360e-01, -8.2459e-02, -4.7448e-02, -8.0733e-02, -7.8643e-02,
         1.7666e-01, -1.2205e-01, -8.6512e-02,  9.7274e-03,  4.9342e-02,
         7.1301e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.0929e-02,  3.9349e+00,  4.2819e-02, -4.7451e-02, -9.7651e-04,
         5.6454e-04, -1.1724e-02, -1.2360e-03,  1.0479e-03,  5.3025e-03,
        -8.5535e-03, -1.7032e-02,  1.0327e-03, -1.1877e-03,  4.9492e-03,
        -1.4714e-02,  6.5371e-03, -3.0454e-03, -6.6643e-03,  1.9821e-02,
        -6.7437e-03,  3.6811e-03, -1.0397e-03,  9.5757e-04,  1.8588e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7842e-01,  4.1041e+01,  3.3770e-01, -3.9962e-01,  1.1881e-01,
         2.1127e-02,  5.8602e-02,  9.3028e-02, -1.9339e-01,  2.0352e-01,
         1.7611e-04,  1.2940e-01,  2.5583e-01,  6.1567e-02, -1.0254e-01,
         3.1252e-02, -5.0982e-02,  6.0079e-02, -2.4233e-02, -1.3917e-02,
        -1.4821e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6856e-02,  6.1851e+00,  2.3431e-02, -1.6241e-02,  1.7556e-02,
         3.0850e-03, -1.6507e-02, -1.5127e-02, -1.3654e-02,  1.5581e-02,
        -3.9679e-02, -6.9745e-02, -1.7155e-02, -3.7306e-02, -8.2068e-03,
        -6.7516e-02, -2.8076e-02,  2.5610e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.1061e-02,  2.9159e+01, -2.9014e-01,  1.0071e-01,  3.0918e-02,
         1.1686e-01,  2.8787e-02, -1.4817e-01,  1.7418e-02,  2.9987e-02,
        -3.9627e-02, -7.3658e-03,  1.1147e-02, -3.8490e-03,  6.6111e-02,
         5.4776e-02,  4.5295e-02, -6.9836e-03, -2.7131e-02,  3.5263e-02,
        -3.2626e-02, -2.3778e-02, -2.4136e-02, -1.0605e-03,  4.2049e-02,
         2.6551e-02,  3.2385e-02,  3.1830e-02, -6.1234e-02,  3.3921e-02,
         8.6927e-02,  1.0867e-02, -4.5696e-02,  4.0529e-02,  1.5627e-01,
         4.8503e-02, -1.9053e-02,  3.8314e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5506e-02,  1.3917e+01,  1.4409e-02,  9.7418e-03,  2.8526e-02,
        -2.8550e-02, -3.0881e-02, -7.7385e-03,  3.2442e-02,  1.9270e-02,
         5.7079e-03, -2.7343e-02, -2.2357e-02,  2.6949e-02,  1.7995e-02,
         2.6366e-02,  1.1758e-02, -6.0988e-03,  1.9034e-02, -1.2633e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0554,  9.1498, -0.1147, -0.1026, -0.0214, -0.0596,  0.0368,  0.0468,
         0.0148,  0.0336,  0.1275, -0.0174, -0.0508,  0.0919,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2808e-02,  8.5282e+00,  4.2372e-02, -1.7090e-02, -5.2244e-03,
        -2.2696e-02, -1.2022e-02,  6.5775e-03, -4.9882e-03,  5.6683e-03,
        -8.2541e-02, -5.6241e-03,  5.6730e-03,  4.8727e-03,  7.4601e-03,
        -8.4825e-03, -4.0599e-03, -1.6853e-02,  4.9268e-03, -1.3213e-03,
        -1.3239e-02,  1.2710e-02,  5.6854e-03, -2.9357e-02,  6.6186e-03,
        -9.7201e-03,  1.3864e-02, -5.2776e-02, -1.5277e-03, -4.4963e-02,
         2.1735e-03,  7.9038e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3962e+00,  1.9186e+02, -8.4958e-01, -1.8891e-01, -7.1674e-01,
        -5.7750e-01, -6.5909e-01, -3.4142e-01, -2.6288e-02, -5.0936e-01,
        -3.3977e-02, -7.4568e-01, -5.4393e-01,  3.3130e-02,  3.2978e-01,
        -2.1440e-01, -2.9969e-01, -1.6597e+00,  3.8795e-02, -1.0998e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1581e-03,  6.9132e+00, -5.8544e-02,  2.2511e-02,  2.7470e-02,
         2.0884e-01,  3.1510e-02,  3.6886e-02,  4.1599e-03,  1.1868e-02,
         1.3343e-02,  8.5753e-04, -8.5865e-03, -6.9981e-04, -5.0497e-03,
        -2.5232e-02, -6.5660e-03,  3.8377e-03, -1.2979e-03, -1.5139e-02,
         3.8606e-03,  1.5946e-02, -2.5629e-02, -2.2981e-03, -1.2311e-02,
        -1.9718e-02, -2.0858e-02, -2.0500e-02, -9.2441e-03,  1.1823e-02,
        -1.6142e-02,  1.5368e-02, -2.5629e-02,  3.5505e-02,  1.1180e-02,
         1.8934e-04, -1.2741e-02,  1.2105e-02, -1.1423e-02, -8.3976e-03,
         3.0932e-03, -5.0251e-03, -1.1980e-03, -3.4645e-02,  7.0814e-04,
        -1.0744e-02, -2.5200e-02,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3571e-02,  2.1543e+00,  2.2405e-03, -3.0630e-03,  7.8358e-03,
        -1.4421e-03, -6.5465e-03, -3.8920e-03,  5.5571e-03, -1.0953e-02,
        -1.2027e-02, -6.5175e-03,  2.6957e-03, -3.5573e-03,  1.3723e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3448e-04,  2.3318e-01,  2.4463e-03,  9.5850e-04, -2.6526e-03,
        -7.8584e-04,  5.1201e-05,  3.5480e-04,  2.6047e-04,  3.0595e-04,
        -2.1755e-04, -2.2103e-04, -7.5094e-05,  1.0331e-03, -2.3549e-04,
        -3.3113e-04, -1.4559e-04, -1.6716e-04,  1.9148e-04, -3.6732e-04,
        -9.9923e-05, -2.0669e-04, -1.7487e-04,  8.8430e-05, -1.2647e-04,
        -6.0717e-04,  6.4459e-04,  2.8479e-03, -2.1902e-04,  1.3508e-04,
        -6.2973e-04, -2.2326e-04, -8.2315e-06, -1.6593e-04, -3.5186e-04,
         2.3180e-04, -2.1831e-04, -2.2924e-04, -8.7019e-04, -5.9305e-04,
        -1.6955e-04,  1.1040e-04, -1.4968e-05, -1.6377e-04, -8.8213e-05,
        -9.7751e-04,  6.0897e-04, -3.1502e-04], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-8.0414e-02,  6.3230e+00, -2.5610e-02, -4.6199e-03,  1.9167e-02,
        -7.6970e-02,  2.5362e-02,  2.9310e-02,  4.9490e-04, -6.3790e-02,
        -9.1400e-03, -5.5042e-02,  1.3403e-02, -3.0105e-03, -3.2624e-02,
        -1.0861e-02, -5.3994e-02, -1.8037e-02,  9.8631e-03, -5.5516e-03,
         2.6249e-03,  1.2323e-02,  1.7735e-02,  2.1687e-02,  7.8664e-03,
        -8.1203e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2950e-01,  4.0670e+01,  4.5458e-01,  1.6485e-01,  3.5248e-02,
         4.3374e-02, -1.3662e-02,  1.3529e-01,  4.1789e-02,  2.7406e-02,
         2.5263e-01, -7.4407e-03,  5.7803e-02,  4.4923e-02, -1.6275e-01,
        -1.5809e-01,  2.8883e-01, -6.4016e-03,  2.0843e-01,  4.2907e-02,
         1.1568e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6940e-03,  2.7869e-01, -2.1904e-03, -3.1492e-03,  1.8601e-03,
        -1.3586e-03,  1.9969e-04, -4.0337e-04,  4.4064e-04,  2.6968e-05,
         4.3158e-07,  2.5684e-04,  2.5590e-04, -1.4929e-03, -8.7489e-04,
        -6.9890e-04,  5.4395e-04, -8.8967e-04,  1.5932e-03, -1.6749e-04,
        -5.3601e-04, -3.5116e-04, -2.7722e-03,  7.0248e-04,  1.1489e-03,
         3.1809e-04, -2.9994e-04,  7.2024e-04,  3.5036e-04, -3.6890e-05,
        -1.5259e-03,  1.5708e-04,  9.3964e-04,  5.3049e-06, -5.4524e-04,
        -3.9142e-04,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2456e-03,  8.2011e-01, -9.5139e-03, -4.5955e-03, -3.9712e-03,
        -2.3674e-03, -1.4405e-03, -2.8082e-03,  7.2778e-03,  8.2695e-04,
        -4.9422e-05,  3.4019e-04, -8.5220e-04, -3.3771e-03, -1.4974e-03,
        -8.5662e-05, -1.0249e-03, -3.7872e-03,  6.6495e-03, -5.6393e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1670e-02,  3.6023e+00,  6.1887e-02, -8.8041e-04,  1.7926e-02,
        -3.7928e-04,  1.3602e-04, -2.0481e-02,  1.0088e-02,  3.8185e-03,
        -1.3867e-02, -1.3143e-02, -2.8192e-02, -3.3826e-03, -5.7234e-03,
        -9.4614e-04, -1.3894e-02, -1.0805e-02, -8.8524e-03, -3.3564e-03,
         7.8537e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9903e-02,  4.9071e+00, -3.6107e-03,  5.1407e-02, -1.8425e-03,
        -7.1178e-03, -3.1630e-02, -5.9463e-03,  2.6264e-02,  6.1510e-02,
         9.8222e-03,  6.5659e-02,  4.5341e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.9754e-02,  3.0201e+00,  1.8001e-02, -4.0005e-03,  6.6105e-03,
         5.5061e-03,  1.2058e-02,  4.7928e-03, -9.4828e-03, -1.1052e-03,
        -2.3047e-02, -1.1617e-02,  4.2311e-03, -1.6182e-03, -1.9154e-03,
        -8.0811e-03,  3.7502e-03,  4.4332e-04, -8.6864e-03, -5.7500e-03,
         8.2637e-03,  7.3940e-05,  1.2070e-03, -1.6225e-03, -3.4953e-04,
        -3.0125e-03, -7.7433e-03,  1.6685e-03,  5.2103e-03, -4.8020e-03,
        -2.6531e-03,  6.2295e-03,  6.4645e-03, -1.3963e-03, -1.9154e-03,
         7.6893e-03,  1.3052e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7828e-02,  2.8131e+00, -2.2470e-02, -3.0763e-03,  8.5936e-03,
        -4.9614e-03,  1.3154e-03,  1.9548e-02,  5.3740e-03,  3.2899e-03,
        -1.1466e-03,  2.1353e-03,  2.7056e-03,  4.1184e-03, -1.9203e-02,
        -1.5546e-03,  7.4824e-03,  3.5972e-04, -4.1083e-05,  2.0062e-02,
         8.0640e-03, -2.1851e-03,  7.7162e-03,  8.3129e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4649e-01,  1.0444e+02,  4.4389e-01, -4.5844e-02,  1.7409e-01,
         4.4151e-02, -5.7454e-02, -4.9668e-01, -6.7598e-01,  7.2126e-01,
        -8.2782e-02, -4.4151e-01, -1.0392e+00,  2.0958e-01,  7.8246e-01,
        -1.6649e-01, -1.0886e-01,  1.4143e-01,  2.6867e-01,  1.6276e-01,
         1.2382e-01,  8.3194e-02,  2.1527e-01, -3.8399e-01, -1.5670e-01,
        -2.3527e-02, -2.1878e-01,  3.3960e-02, -3.4284e-01, -1.5974e-01,
        -3.0896e-01, -7.1960e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6940e-02,  2.0904e+01,  4.1267e-02, -1.1456e-01, -2.0778e-02,
        -1.9621e-02,  1.6549e-02, -3.9663e-02,  4.3730e-02, -1.9844e-02,
         9.6705e-03,  4.4463e-03, -3.7459e-02,  2.3097e-02, -2.9378e-02,
        -3.7845e-02,  3.6477e-02, -1.4192e-02,  8.9591e-03,  3.5326e-02,
         2.0367e-02,  5.9145e-02, -2.5552e-02, -4.9025e-02,  2.4237e-02,
         4.3961e-02,  1.0672e-01,  5.0967e-03,  4.0095e-02, -1.6533e-01,
         1.3511e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9840e+00,  1.3862e+02,  1.0629e+00,  2.3628e-01,  7.5979e-02,
        -1.1753e-01,  1.2263e+00, -1.7976e+00, -8.5394e-02, -3.2950e-01,
         1.3697e-01,  2.1757e-01,  2.7590e-01, -6.5572e-01, -1.2543e-01,
        -3.8737e-02, -9.1078e-02, -3.5897e-01,  2.5605e-01,  8.0161e-01,
        -3.2520e-01, -1.0000e+00, -1.8811e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7927e-02,  3.6691e+00,  1.8642e-02,  5.5843e-03, -1.2106e-02,
         2.4409e-03,  2.1673e-02,  6.6364e-03, -3.8033e-03,  1.9694e-03,
         1.2485e-02,  9.2129e-03, -6.6734e-03, -6.5996e-03,  4.8535e-03,
        -6.3619e-03, -5.8675e-03,  8.3633e-05, -1.4057e-03, -8.6737e-04,
        -1.1529e-03, -3.2570e-04,  2.4614e-02,  3.9428e-03,  1.0450e-02,
         9.3043e-03,  5.2099e-02,  6.7775e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-4.3066e-02,  4.9708e+01,  3.6468e-01, -1.4354e-01, -1.3151e-01,
        -1.8643e-01,  1.0823e-01, -8.9987e-02,  8.0705e-02, -6.4606e-02,
         8.6707e-02,  1.0547e-02, -6.9723e-02,  1.6491e-02,  1.0894e-02,
         7.8179e-03,  1.2118e-01,  2.0728e-01, -6.1851e-02,  2.0024e-01,
         4.9413e-02, -2.3942e-02, -1.7214e-01,  3.4011e-02,  6.3031e-02,
        -2.6017e-02,  1.0752e-01, -4.9698e-02, -6.0545e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8368e-02,  3.7511e+00, -8.5856e-03, -2.6041e-02,  2.0201e-02,
         2.0496e-03,  4.1003e-03, -7.2980e-03, -8.4789e-03,  5.0343e-03,
         1.0149e-02, -1.0284e-02, -3.0858e-03, -2.2679e-03, -2.3829e-03,
         1.3437e-04,  3.8135e-03,  2.2222e-03,  3.2872e-02, -4.7309e-04,
         4.8901e-03,  1.3207e-02, -3.6400e-03, -1.2364e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0675e-01,  5.9835e+01,  2.0073e-01,  7.8965e-02,  8.6576e-02,
         2.2942e-01, -1.0201e-01, -2.1968e-02,  1.0495e-01, -1.0390e-01,
        -3.1569e-02, -9.7998e-02,  2.2864e-01,  2.0221e-01,  1.5694e-01,
         1.7352e-01,  6.8024e-02, -5.7437e-02, -3.1292e-01,  3.1701e-03,
         2.4745e-01, -1.7278e-01, -2.1969e-02,  2.4673e-01,  1.4063e-01,
        -1.3081e-02, -4.4994e-02, -2.0826e-03, -1.0335e-01,  5.9635e-02,
         5.1126e-03,  5.1976e-02,  1.0414e-02, -8.6387e-03], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8442e+00,  1.7480e+02,  8.3481e-01,  2.5359e-01, -4.0193e-01,
        -3.3064e-01,  8.5898e-02,  1.4950e-01,  3.4613e-01,  1.5158e-01,
         2.6880e-02, -4.5723e-01, -2.4921e-01,  3.9892e-02,  2.6170e-01,
        -5.7103e-01, -2.2677e-01,  3.3554e-01,  7.9567e-02,  3.7167e-01,
        -2.5237e-01, -2.0844e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.0400e-01,  1.9817e+01,  4.6004e-02,  2.9902e-01, -1.5841e-02,
         2.7892e-02,  6.8590e-02,  9.7855e-02, -8.1371e-03, -2.6113e-02,
         1.0061e-01, -1.0045e-01, -9.2966e-02, -4.9230e-02, -8.5853e-02,
        -7.4526e-02, -3.6525e-01, -2.4327e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7083e-03,  2.6988e+00, -3.9652e-03,  1.2172e-02,  1.3864e-02,
        -2.9290e-05,  1.7745e-03, -4.3517e-03,  7.6110e-03,  5.2031e-03,
         6.9680e-05, -1.2009e-02, -1.3537e-03,  4.4676e-03, -1.5931e-03,
        -3.0106e-03, -6.0122e-04, -1.2627e-02, -2.2394e-03, -1.4948e-02,
        -3.0425e-03,  1.1793e-02, -8.1104e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6795e+00, -2.2548e+02, -2.6277e+00,  8.3029e-03, -7.9664e-01,
        -5.1514e-01,  1.3193e-02,  2.7923e-01, -9.4279e-01,  2.5891e-01,
        -1.3992e+00,  1.7080e-01,  1.1057e-01,  2.0653e-01,  5.3315e-01,
         7.7098e-01,  4.5432e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.6773e-01,  2.8796e+01,  1.2781e+00,  2.6446e-02,  4.8522e-02,
         2.2164e-01,  9.6364e-03,  6.3365e-02,  1.4316e-01,  1.6739e-01,
         1.1323e-01, -1.6978e-02, -8.1099e-02, -6.9429e-02, -5.4770e-03,
        -2.0345e-02, -3.5581e-03, -3.1150e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7514e-02,  1.6818e+00, -1.6475e-02, -3.6103e-03, -4.2525e-03,
        -1.9174e-03, -3.5890e-03, -1.3689e-02,  4.5932e-03,  7.6361e-02,
         1.5093e-03, -2.2066e-03,  2.2572e-03,  8.0972e-04, -2.8820e-03,
        -8.1171e-04,  3.2972e-03,  2.4955e-03,  5.5723e-03, -2.6019e-03,
        -2.7830e-03, -4.9607e-03,  2.3566e-03,  3.3678e-03,  2.3837e-03,
        -5.9710e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1306e-01,  3.7778e+01, -3.2390e-01, -7.4171e-02,  1.6160e-02,
        -1.9061e-01,  1.5585e-02, -4.9876e-02,  7.4084e-02, -1.0981e-01,
        -1.3167e-01, -8.8328e-02, -8.5573e-02, -2.0621e-01, -2.4618e-01,
        -1.3058e-02, -3.4097e-02, -1.9102e-02,  1.7570e-02, -4.3631e-02,
         4.5939e-02,  3.8926e-02,  3.1113e-02, -2.2174e-03, -3.2960e-02,
         7.8023e-02,  3.7412e-03, -3.4153e-02,  5.3222e-02,  1.9192e-01,
        -7.5669e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9767e-02,  1.7300e+00,  1.2354e-03,  4.0842e-03, -1.2191e-02,
        -5.4465e-03, -1.1711e-02, -4.4539e-03, -1.0439e-02, -1.7438e-04,
        -7.1869e-03, -4.4692e-03, -8.1689e-04,  2.6391e-03, -1.9136e-03,
         8.1967e-04, -5.6489e-03, -7.1469e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7502e-01,  1.3021e+01, -5.4502e-01,  5.8227e-03,  7.4932e-02,
        -6.1970e-02,  1.1096e-02,  2.4705e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-3.6586e-03,  2.8604e+00, -2.4662e-02, -2.7369e-03, -2.3246e-02,
        -3.1216e-03, -5.7194e-03, -2.6975e-03, -1.2115e-02, -2.1573e-03,
         4.1605e-03,  6.6303e-04, -1.9680e-03, -3.8386e-03,  4.7231e-03,
        -1.1502e-03, -8.3710e-03,  9.3743e-04,  9.0777e-03, -3.3611e-03,
        -1.0659e-02, -5.9671e-03, -1.2291e-02, -2.6204e-04, -5.2542e-03,
        -1.8390e-03, -8.4783e-03, -1.2889e-03,  3.9772e-03,  2.6476e-03,
         4.9334e-03, -1.7232e-03, -7.4123e-03, -8.3097e-07,  1.6895e-03,
        -1.0789e-03,  1.9829e-03, -1.7410e-03,  3.5245e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1725e-01,  1.4109e+01,  4.6536e-02, -1.6758e-02, -2.1969e-02,
         1.3073e-02,  2.4475e-02, -1.7995e-02, -7.3950e-03,  3.6962e-02,
        -1.3473e-02,  8.6610e-03,  2.1298e-01, -1.3494e-02,  1.1041e-03,
        -3.1274e-02, -2.4369e-02, -3.2977e-02,  1.0881e-03,  2.3220e-02,
         1.0100e-01, -8.2835e-03,  3.7591e-02, -6.1499e-03, -1.0533e-02,
        -2.9585e-02, -3.9123e-02, -5.8865e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2204e-01,  9.6070e+00, -9.0790e-02,  1.2360e-01,  2.2479e-02,
         7.3125e-02,  5.7056e-03,  2.5112e-02,  5.0935e-02, -1.5800e-02,
         7.0692e-02,  6.2918e-02,  5.1898e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2069e+00,  1.9137e+02,  3.0845e-01, -1.2810e+00, -1.5811e+00,
        -4.9603e-01, -8.5589e-01, -4.3590e-01, -2.3200e-01, -1.7069e-01,
        -1.9127e-02, -5.7620e-01, -6.4319e-01, -4.9179e-01, -3.9931e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3653e+00, -1.0252e+02, -1.2941e+00,  2.2961e-01, -5.5397e-01,
        -1.2352e-01, -5.9211e-01, -2.0355e-01, -3.8728e-01,  1.5310e+00,
        -6.5085e-02, -1.7943e-01,  5.9603e-01, -3.7487e-01, -8.1813e-03,
        -1.9865e-01,  3.9519e-02, -2.2725e-01,  3.8271e-02, -5.3071e-02,
        -1.4580e-01,  9.3396e-02,  5.3537e-02,  2.4818e-02,  2.0698e-01,
         1.2582e-01, -7.0521e-02, -4.5403e-01, -4.8901e-02,  8.3361e-02,
         4.9395e-01, -8.7158e-02, -7.8456e-02, -1.0846e-02, -2.6480e-02,
        -5.3795e-02,  4.3885e-03, -7.1503e-02, -1.0595e-01,  8.2407e-02,
        -1.0647e-01, -1.8694e-02, -1.6806e-01,  6.2474e-02,  4.5847e-01,
        -7.6569e-02,  2.0936e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7141e-01,  2.2734e+01,  1.0209e-01,  1.5346e-01, -1.2024e-01,
        -6.1530e-02, -1.8674e-02, -8.9835e-03,  3.9033e-02, -1.9364e-02,
         2.5930e-02, -6.9005e-02, -2.3494e-03, -9.9830e-03,  2.3537e-02,
        -9.5489e-03,  2.5503e-02,  7.8147e-03,  8.2659e-03, -2.6717e-02,
         1.1913e-02, -3.7582e-02,  4.0115e-02,  3.1226e-02,  1.2583e-02,
         2.0018e-02,  8.4081e-02,  4.5794e-02, -1.4816e-02,  1.8116e-02,
        -3.3672e-03,  1.2008e-02,  2.0753e-02, -2.1068e-03,  2.9067e-02,
         8.6585e-06, -9.4350e-03, -1.7176e-02,  3.7563e-03, -3.6388e-03,
         1.6721e-02,  6.5024e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8695e+00, -1.4150e+02, -1.4214e+00, -4.5199e-01, -1.2150e-01,
        -3.6842e-01, -2.9030e+00,  2.9383e-01,  3.9036e-01, -5.6911e-01,
        -3.9039e-01, -5.8710e-01,  1.9107e-01, -2.4145e-01, -4.8589e-01,
        -2.1346e-01, -3.5686e-01, -7.1068e-01, -7.4016e-02, -1.1478e-01,
        -1.2065e-01,  2.3409e-01, -3.7089e-01,  5.4368e-01,  1.1517e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.7491e-02,  3.5277e+01, -9.3373e-02, -1.7975e-02,  8.0026e-02,
        -4.3677e-02,  1.4220e-01, -5.3262e-02,  6.1863e-02,  4.3379e-02,
         6.3750e-02,  4.8678e-02,  9.1506e-02, -2.1234e-01,  5.2140e-02,
         8.2231e-02,  1.1256e-02,  1.1163e-01, -9.3638e-02, -8.3204e-02,
         4.8583e-02, -3.3935e-02, -1.6745e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2830e+00,  3.9959e+01,  1.2985e-01, -1.8753e-01, -3.1951e-01,
        -7.2801e-02, -5.2270e-03, -3.9802e-02, -2.1185e-01, -8.8291e-02,
         1.1869e-01, -1.3096e-01,  7.2005e-02,  2.0011e-02, -2.2140e-01,
        -5.1000e-02, -1.2031e-02, -5.6959e-02, -1.0577e-02, -5.3756e-02,
        -4.3646e-03, -3.6284e-02, -1.0756e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2804e-02,  4.6445e+00,  3.7827e-02, -2.6271e-02, -2.6359e-02,
        -5.7150e-03, -9.9854e-03,  1.6531e-02,  9.3520e-04, -8.9064e-04,
        -4.6036e-02,  1.2540e-02, -2.6979e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9846e-01,  1.2263e+02, -1.2930e+00,  1.3960e-01,  1.9924e-01,
        -3.0558e-01, -1.5363e-01,  3.9301e-01,  3.3477e-02,  4.8087e-01,
         2.8830e-01,  2.1459e-01, -6.5206e-01,  3.3603e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6505e+00, -1.1222e+02, -1.6990e-01,  1.4470e-01,  9.1714e-02,
         1.0896e-01, -5.4930e-02,  3.8405e-01,  8.9840e-02,  7.9749e-02,
         5.3963e-01, -1.3999e-01,  2.4168e-01, -2.8046e-01,  3.6905e-02,
        -1.7776e-01,  4.3312e-01, -1.8200e-01, -9.4756e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-1.1377e+00,  3.7890e+01, -2.4880e-01, -1.5885e-01, -2.4114e-02,
        -1.8671e-02, -1.3577e-01, -6.2770e-03, -6.5893e-02, -9.3934e-03,
        -1.1191e-01,  7.6005e-02,  3.8004e-02,  4.0829e-01,  1.3295e-02,
        -1.3702e-01,  2.7928e-01,  9.9005e-02,  2.7231e-02,  1.9551e-01,
         4.2079e-02,  1.3443e-02, -6.4230e-02,  7.3192e-02, -2.4376e-01,
        -6.8153e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.7921e-03,  6.7859e-01,  1.6644e-03, -4.5086e-03,  2.8483e-03,
         2.1093e-03, -2.8998e-03,  3.6326e-03,  2.4662e-03, -4.7630e-04,
         2.4656e-03, -1.8956e-03,  2.1254e-03,  1.1779e-03,  5.0049e-03,
         2.2205e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3085e-01,  1.7926e+01, -6.2387e-02,  8.5823e-02,  3.9465e-02,
        -1.7035e-02,  1.3353e-01, -3.1667e-02,  5.4140e-02,  2.0280e-02,
         9.6194e-02, -1.4136e-01,  1.8664e-02, -3.9811e-02,  1.2052e-01,
         2.0791e-02,  2.1670e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.6199, -64.0716,  -0.7795,  -0.0771,  -0.3266,   0.7689,  -0.0926,
         -0.2040,  -0.8031,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1068e-01,  2.4059e+01, -2.8383e-01,  2.6116e-01, -1.0282e-01,
        -8.7583e-03,  2.0549e-02, -1.2358e-01, -4.9019e-02,  1.5841e-02,
         7.4908e-02,  5.1532e-01, -4.3309e-03, -3.4596e-03,  3.7250e-02,
         5.5548e-02, -1.0737e-02,  4.9243e-02, -2.3635e-02,  7.2851e-03,
         5.1871e-02,  5.4614e-02, -3.9170e-02, -6.4466e-02,  3.5037e-02,
         6.5517e-02,  4.3980e-02,  3.3123e-04,  9.7911e-02, -4.7144e-03,
         3.5732e-02,  3.8818e-02,  7.6120e-03, -6.9951e-03,  4.6880e-02,
         7.3414e-02, -4.3979e-02, -2.8712e-02, -6.7392e-02, -1.9546e-02,
        -6.9098e-02,  3.0580e-01, -4.1358e-02, -2.3452e-02,  1.9829e-01,
         2.1056e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2559e-02,  6.2248e+00,  1.5016e-01, -4.1494e-02,  3.0223e-03,
         1.7360e-03,  5.5098e-03, -1.8573e-03, -1.9168e-02,  4.2244e-03,
        -1.1631e-02, -6.7869e-03,  3.8602e-02, -3.3401e-03,  2.6109e-03,
         2.7894e-03,  6.6799e-03,  2.9474e-02,  3.6655e-03,  3.4560e-03,
        -2.6766e-03, -2.8094e-02, -3.1645e-02,  1.4721e-02, -1.1791e-02,
        -2.0254e-02,  1.8108e-02, -9.1350e-03,  2.4466e-02, -1.8675e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ -3.5927, 194.9647,   0.8905,   3.9499,   0.4288,  -1.2186,   0.9764,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2298e+00, -1.4662e+02, -2.5694e-01,  1.5202e-01,  2.6562e-02,
         4.3294e-01, -5.3557e-01, -7.7001e-01, -5.5387e-01, -2.7006e-02,
        -2.6173e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1876e-01,  4.9868e+01,  1.0611e+00, -1.2898e+00, -2.1943e-01,
         1.4822e-01, -1.9681e-01, -2.9218e-01,  4.0107e-01, -3.1590e-01,
        -1.8378e-01,  3.3358e-01, -2.1695e-02, -1.1780e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2870e+00,  6.3003e+01, -9.8207e-01,  6.3690e-02, -1.0880e-01,
         5.1235e-04,  4.6729e-02, -3.0079e-02,  4.2822e-01, -2.7953e-02,
         4.4368e-02,  1.4912e-01, -1.4865e-01, -7.7555e-01, -8.1041e-02,
         6.2955e-02,  1.0230e-01,  1.1151e-02, -3.2300e-01, -1.1519e-01,
         6.0092e-01, -3.1287e-01, -5.4546e-02, -7.8466e-02,  1.8030e-02,
        -7.8007e-02, -6.8459e-02,  1.1376e-02, -7.9567e-02, -5.9035e-03,
         1.1210e-02,  1.7303e-01, -2.1470e-03,  4.5147e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6540e-02,  3.3763e+00,  5.5857e-02,  1.2760e-02,  9.9989e-03,
        -7.4580e-03, -1.9122e-02, -1.0781e-02, -3.9039e-03, -5.8511e-03,
        -1.1868e-02,  5.2366e-03, -9.4710e-03, -2.5713e-03, -9.0468e-03,
        -9.4566e-03, -3.4607e-02,  1.4276e-02,  1.2167e-03,  9.1403e-04,
         1.5952e-02,  3.1574e-04, -1.9080e-03,  4.6701e-03,  9.6255e-04,
         2.0382e-02, -2.9205e-02, -4.4040e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3824e-01,  9.3950e+00,  1.9435e-02, -2.4894e-02,  2.4759e-02,
        -6.6478e-02, -1.6967e-02,  2.6313e-02, -1.9113e-02, -8.1738e-03,
        -2.5429e-02, -8.5705e-03,  1.2397e-02,  7.3139e-02, -8.4861e-02,
        -1.9259e-02,  4.7466e-02,  3.2634e-02,  7.3652e-03,  6.5410e-03,
        -4.8221e-03,  1.4038e-02, -1.2136e-01, -4.1827e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 1.6266e+00,  5.6769e+01,  3.0425e-01,  3.6149e-01,  6.3449e-02,
         6.6374e-02, -3.4014e-01, -8.4909e-02,  4.6455e-02,  1.1451e-01,
        -3.1698e-01,  3.8259e-02,  4.5320e-01,  2.1076e-01,  3.3097e-02,
         6.2728e-02,  2.3373e-01,  2.2481e-02,  1.0730e-01,  4.9766e-01,
         9.8295e-02,  2.3295e-01, -1.8385e-01,  2.5419e-01,  2.3795e-02,
         1.6421e-01,  1.2301e-01, -6.0735e-02, -5.0239e-02, -3.0689e-02,
        -1.1506e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2326e-02,  5.6277e+00, -9.0726e-03,  1.3085e-03, -4.5783e-02,
        -2.5947e-02, -1.7398e-02, -3.2541e-02, -2.7577e-03, -3.9111e-03,
        -8.4858e-03, -1.6229e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.2114e-02,  9.1027e+00,  6.3337e-04,  9.7641e-02,  1.9542e-02,
        -6.0590e-02,  1.8724e-03,  5.0995e-02,  1.4651e-02,  5.6086e-02,
        -4.0362e-02, -4.8490e-02,  2.4904e-02,  5.4088e-02,  1.0826e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8547e+00,  1.2443e+02,  3.3461e-01, -7.1007e-01, -7.0834e-01,
         2.1586e-01,  1.0517e-01,  1.8634e-01, -1.2263e-01, -3.4294e-02,
        -4.1076e-02, -2.3367e-01, -1.1614e-01,  1.0124e-01, -6.2805e-02,
        -5.4562e-02,  2.6033e-01,  2.6569e-01,  2.2730e-01, -7.8171e-02,
         7.1282e-02,  1.4559e-02, -3.7226e-01,  1.3753e-01,  2.6782e-01,
         1.4575e-01,  1.8329e-02,  5.3982e-02, -1.4579e-01, -1.2308e-01,
         5.0442e-02, -9.3595e-02, -4.4000e-03], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6756e-02,  1.3844e+01, -2.4696e-01, -9.3692e-02,  4.4494e-02,
        -9.3854e-02,  1.4367e-02, -1.6863e-02, -1.5789e-01,  2.7032e-03,
        -1.2504e-02, -2.0033e-02, -5.5849e-03,  1.4449e-01, -2.8481e-02,
        -1.5042e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.4781e-01,  3.7742e+01, -6.2123e-01,  5.0299e-01,  6.8829e-03,
        -4.8206e-02,  1.8326e-01, -8.1807e-02, -7.0791e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0297,  1.3643,  0.0176, -0.0067, -0.0057,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1206e+00,  8.3540e+01, -1.3800e+00,  2.4246e-01, -3.8389e-02,
        -4.0341e-01, -2.2557e-01, -1.5202e-01,  3.2185e-02,  2.2872e-02,
        -2.9304e-02, -1.2867e-01,  7.8166e-02, -1.8408e-01,  1.3901e-03,
        -2.6252e-02,  1.0510e-01, -6.6187e-02,  1.2877e-01,  2.1594e-01,
         2.3895e-01,  1.4841e-01,  6.5869e-02,  2.1242e-01, -2.4320e-02,
        -2.8138e-01,  3.1987e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  -0.3586, -101.4407,    0.1146,    0.2947,    0.9939,   -1.2856,
           1.6443,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8259e-01,  1.0953e+01,  3.6261e-02,  1.1491e-01,  1.8418e-02,
         4.2777e-02, -5.8931e-02,  8.8808e-03, -1.4133e-02,  5.4225e-02,
         8.4884e-03, -1.1424e-01, -3.5486e-03,  9.4542e-03, -1.2053e-02,
         3.7644e-02,  1.0698e-02, -1.6651e-03,  1.1685e-02, -1.4647e-02,
        -6.7120e-02,  6.4158e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9451e-02,  2.0630e+01,  1.3120e-01,  1.5295e-01,  8.2893e-02,
        -3.8023e-03, -1.9736e-02,  5.8220e-02, -1.9887e-02, -1.0969e-01,
        -2.0257e-03,  2.1273e-03,  1.1672e-02,  1.0930e-01, -3.0026e-02,
         4.8860e-02,  2.4212e-02,  3.1054e-03, -1.0595e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6846e-01, -7.5873e+01,  4.1153e-01, -2.3544e-02,  2.5921e-01,
         1.1959e-01,  2.9167e-01,  1.4626e-01, -1.9372e-01, -3.4492e-02,
         2.2417e-01,  7.4071e-02,  3.7397e-02,  3.1968e-02,  1.6307e-01,
        -4.5607e-02,  1.1420e-01,  2.8023e-01,  1.7991e-03,  2.9305e-01,
         5.3128e-02,  1.3028e-01, -5.1887e-02,  2.9266e-02, -4.0572e-02,
         4.3901e-02,  8.4109e-02,  1.0354e-01,  3.8779e-02,  5.5561e-01,
        -2.3319e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 1.6097e-01, -1.6438e+02, -3.6187e+00, -6.9166e-01,  1.0567e+00,
         3.7309e-01, -2.0352e-01, -2.7981e-01, -5.3762e-02, -9.6543e-02,
        -3.6654e-01,  8.1530e-02,  7.7717e-01, -3.3692e+00,  1.2144e-01,
        -6.1117e-02, -1.6096e+00, -8.6824e-02, -2.5838e-01,  5.2715e-02,
         3.0392e-02,  8.4047e-02, -1.2156e-01, -5.5178e-02,  4.0491e-01,
         3.7622e-03,  5.1705e-01,  1.4820e+00, -3.2991e-01, -4.4905e-01,
        -6.6473e-01, -6.0158e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5453e-02, -1.1722e+01,  8.0161e-02,  1.8104e-02, -4.9330e-02,
        -1.3745e-02, -2.3108e-03, -3.5580e-02, -1.7196e-01, -1.8476e-02,
        -2.0902e-02, -4.0983e-02, -8.2928e-03, -2.3674e-02, -4.6626e-02,
        -2.7741e-02, -3.4519e-02,  2.0997e-02,  3.8833e-02, -2.6378e-02,
        -2.8249e-02, -6.1272e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3523e-01, -1.5398e+02, -3.1509e-01, -1.0994e-01, -1.5096e-02,
        -4.4673e-01, -3.5698e-01, -1.3446e-01, -6.6755e-01, -1.5364e-01,
        -8.4019e-02,  5.8953e-02,  4.5504e-01,  1.7492e-01,  2.9586e-01,
        -1.3613e-01,  3.4463e-01, -1.6355e+00,  3.7943e-02, -5.8708e-02,
         9.6176e-02,  7.3030e-01, -1.4559e-01,  1.9728e-01,  2.1664e-02,
        -7.2541e-01,  3.1668e-02,  1.9453e+00,  1.9231e-01,  4.2623e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5692e-03,  1.3765e+00,  4.2720e-03, -1.5515e-03, -5.4529e-03,
        -5.3358e-04,  1.7594e-03, -2.6315e-04, -1.4180e-02, -7.9365e-03,
        -1.3374e-03,  5.2302e-03, -3.8405e-03,  2.9666e-03, -2.9929e-03,
        -1.9948e-03, -1.4627e-03,  3.3880e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4922e-02,  1.6352e+01,  8.2776e-01,  4.2288e-02, -1.5901e-02,
        -1.0551e-02, -8.8040e-03, -2.4242e-02, -3.5847e-02,  4.4671e-03,
        -6.1651e-03, -1.0160e-02,  1.9373e-02, -5.4430e-03, -8.9370e-03,
        -3.5005e-02,  1.4561e-02,  2.8041e-03,  7.4934e-03,  1.0211e-03,
         1.0692e-02, -1.6546e-02,  2.1566e-02,  3.6058e-03,  1.7423e-02,
        -2.3857e-02, -4.9284e-02,  2.8298e-03, -1.4419e-02, -2.9389e-02,
        -1.7296e-02, -7.5094e-03,  1.8938e-02, -5.2050e-02, -7.5943e-03,
        -3.2767e-02,  7.9779e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0794e+01, -2.4169e+02,  5.1564e+00, -5.8050e+00,  2.5978e-01,
         2.1535e-01,  1.8804e-01,  5.0309e-01, -4.7435e-02, -4.6620e-01,
         2.8401e-01,  1.0515e+00,  2.1274e-01,  7.9522e-01,  4.7171e-01,
        -1.4145e-01, -3.9350e-01,  8.3151e-01,  2.2459e-01, -7.4954e-03,
         3.8056e-01,  1.5319e-01, -5.4689e-01,  6.6847e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.7509e-02,  1.3695e+01,  6.3356e-02,  1.4651e-02, -3.3725e-03,
         3.0879e-02, -1.4662e-02,  3.6959e-02,  3.0881e-02, -8.9144e-03,
         1.5438e-02, -7.6901e-03,  5.0136e-03, -1.6714e-02,  1.6412e-02,
        -1.9977e-03, -3.3002e-02, -2.0880e-02,  1.1322e-02,  1.6380e-03,
         1.5761e-02, -3.6598e-02, -1.4560e-02, -3.5359e-03, -4.6588e-03,
         2.9946e-02, -1.2217e-02, -2.4778e-04, -1.4080e-02, -5.9406e-03,
         1.3787e-02, -8.1001e-03, -1.0305e-02,  8.1265e-03, -1.7545e-02,
         4.2903e-02, -7.3060e-03, -2.2662e-03,  6.0958e-03, -1.0422e-02,
        -6.8967e-03,  4.0291e-03,  1.5064e-02,  5.3992e-02,  2.6884e-02,
        -1.1061e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3769e-04,  2.9244e+00, -1.9374e-02, -8.1055e-03, -2.1882e-02,
         1.6513e-03, -5.2136e-02, -2.8582e-03,  1.0347e-03, -1.0937e-02,
         3.2742e-03, -2.9912e-03, -1.0098e-02,  1.3382e-02,  6.3103e-03,
         6.9459e-03,  4.3053e-03,  2.0933e-03,  2.2910e-04, -1.6925e-03,
         5.2023e-03, -4.3531e-02, -1.8765e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0928e-01,  3.0324e+01, -3.2500e-01, -1.1752e-01,  1.9818e-03,
         1.5804e-02, -1.4850e-02, -1.0541e-01, -5.9950e-02, -1.4329e-01,
        -4.0362e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3890e+00,  4.2503e+01, -2.4444e-01,  2.9791e-01,  2.7569e-01,
        -3.7138e-01, -2.4390e-01, -2.6382e-01,  6.9764e-02,  2.0156e-01,
         1.7841e-02,  2.6371e-02,  3.4869e-02,  2.7749e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6934, 60.2007, -0.1415, -0.4996, -0.1039,  0.0682, -0.5460,  0.4679,
        -0.3217, -0.0667, -0.0805,  0.1107,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3996e+00, -1.8350e+02,  1.8748e-01, -7.2450e-01, -3.3403e-01,
        -3.6069e-01,  1.2829e-01,  7.5543e-02, -1.6373e-01, -1.2153e+00,
        -2.4012e-02,  1.1618e-02, -1.5095e-01, -1.8625e-01, -2.1902e-02,
         3.0928e-02, -1.6800e-01,  6.7317e-02, -9.4884e-02,  2.8394e-01,
         1.8149e-01, -1.4025e-01, -7.0717e-01,  1.9766e-01,  4.4263e-01,
         2.1081e-01, -2.6417e-01,  1.9159e-01, -2.6199e-02, -3.0843e-01,
         5.3257e-02, -4.7901e-01,  4.4998e-02, -1.5280e-01,  4.6051e-01,
         1.3867e-01,  1.2611e-01, -9.1588e-02, -1.3789e-03,  8.4541e-02,
         2.1402e-02, -1.1462e-01,  2.5551e-01, -3.2383e-02, -1.9444e-01,
        -1.8698e-02, -1.0988e-01, -4.0542e-02, -2.6054e-02,  4.9439e-02,
         1.5193e-01, -2.9821e-02,  1.5637e-01, -1.6816e-01,  3.1223e-01],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 1.5711e+00, -1.9204e+02, -8.0732e-01,  1.2115e+00,  3.0917e-01,
        -9.4120e-01, -5.5695e-01, -6.4460e-01, -6.3679e-01, -4.3296e-01,
         1.6198e+00,  3.2943e-01, -1.4438e+00,  4.8277e-01,  4.6859e-01,
        -2.2682e-01,  3.9743e-01,  3.2631e-01, -5.5551e-01,  9.7830e-02,
        -7.6830e-01, -1.4322e+00,  2.0159e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2357e+00,  4.4179e+01, -3.5444e-01,  9.8016e-03,  2.0705e-01,
         7.0711e-02, -1.1541e-01, -3.7362e-02, -6.8792e-03, -1.7428e-01,
        -1.1500e-01,  1.9261e-02, -2.1926e-01,  7.9490e-02, -8.4143e-02,
         4.7779e-03, -1.9761e-01,  3.8152e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0214,  4.9788,  0.0244,  0.0075,  0.0666,  0.0153, -0.0358, -0.0058,
         0.0054, -0.0262,  0.0129,  0.0120,  0.0277, -0.0063,  0.0169, -0.0292,
         0.0167, -0.0541,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6343e-02,  7.5411e+00, -3.0284e-02,  3.7453e-02,  3.3142e-02,
        -3.2867e-02, -1.8692e-03, -3.5539e-02,  2.3146e-02,  2.7969e-02,
        -2.7462e-03,  1.8705e-02,  3.7187e-02,  4.8433e-02, -5.4254e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6303e-02,  2.3807e+00, -2.9980e-02,  8.9020e-03, -3.5220e-03,
        -1.2413e-02, -8.8223e-03, -8.9486e-04, -1.1224e-02, -7.5696e-03,
         4.1251e-03, -1.8227e-03,  1.3632e-03, -1.2803e-03, -5.3396e-03,
         6.4552e-03, -1.2192e-02, -1.1940e-03, -3.4176e-04,  1.4155e-02,
         1.2048e-03,  6.5967e-03, -2.7957e-03, -2.9494e-03,  5.8287e-03,
         3.5453e-03, -1.6421e-04,  4.9845e-05,  1.2614e-02,  6.2047e-03,
        -2.8847e-04, -2.2248e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0169e-03,  6.0042e+00, -1.0687e-01, -2.3067e-02,  9.0997e-03,
         5.7532e-03,  3.5953e-03,  2.7045e-03, -5.2749e-03,  1.1009e-02,
         1.1624e-02,  3.6763e-03,  4.3396e-03,  2.5480e-03,  3.5119e-03,
        -1.5823e-02,  2.9141e-03,  1.2117e-02,  8.1244e-03,  1.1014e-02,
         3.6685e-03,  2.5482e-02, -1.7492e-02, -2.4285e-02,  1.2058e-02,
         8.5361e-03,  1.1116e-02,  1.4656e-02,  1.3891e-03,  1.0689e-02,
        -3.2878e-02,  6.8445e-04, -3.2226e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7214e-01,  1.2219e+02,  4.9063e-01,  1.5789e-01, -3.3102e-01,
         5.0683e-02,  1.3702e-01, -2.4757e-01,  2.4784e-01,  2.9484e-01,
        -3.4623e-02,  1.4011e-01, -1.4498e-01, -2.6325e-01, -1.6277e-01,
        -1.5298e-01,  1.5791e-01,  1.2750e-01, -2.7318e-01, -6.6023e-02,
        -1.0545e-01, -2.0356e-01,  6.3096e-02, -8.3269e-03,  5.8312e-02,
        -4.8540e-04, -9.9361e-03, -8.5081e-02, -4.5029e-02, -4.0226e-01,
         2.9410e-01, -4.5327e-02,  1.3356e-01, -6.5225e-01,  4.4229e-02,
         2.0036e-01,  1.0379e-02, -1.5710e-01,  1.5248e-02, -8.2178e-02,
         1.9649e-02, -1.8611e-01,  3.6285e-02, -3.5994e-02,  7.6727e-02,
        -2.4728e-01, -4.5423e-02,  1.2857e-02,  1.8047e-02,  1.7848e-02,
         1.3644e-01, -1.0874e-02,  7.3578e-02,  9.1951e-02, -3.4219e-02,
         2.0333e-01,  1.7677e-02,  7.1780e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.4057e-01, -7.4883e+01,  3.2819e-01, -2.3063e-01,  1.7054e-01,
         1.8858e-01, -1.1078e-01, -2.9779e-02, -1.2205e-01,  1.1002e-01,
        -2.4322e-01, -1.5122e-01,  2.8823e-02,  3.3390e-01, -3.7911e-02,
         8.1009e-02, -1.0999e-01, -1.2887e-01,  6.2213e-02,  1.1730e-01,
         4.0263e-02,  1.7237e-01,  1.0948e-01,  2.8909e-01,  9.2924e-01,
        -4.0751e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3129e-03,  3.1246e-01,  3.6519e-04,  2.5272e-03,  1.8923e-05,
         1.8573e-03, -7.5949e-04,  5.3099e-05, -9.7100e-04, -6.5230e-04,
         8.4810e-04,  5.9983e-04,  3.0654e-04, -3.3539e-04,  1.9539e-04,
        -8.7170e-06, -1.1338e-04,  2.0485e-04, -4.9450e-04,  5.7480e-05,
         1.0738e-03, -3.8401e-03, -1.0522e-04, -4.7334e-05, -4.0161e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0905e+00, -1.0913e+02, -2.4837e-01, -2.3620e-02, -2.4479e-01,
         5.4670e-02, -9.2628e-03,  1.1752e-01, -2.2514e-01,  4.9398e-02,
         1.8557e-01, -2.9017e-02,  6.1924e-02, -5.3669e-01,  1.5178e-01,
         1.0637e-01,  4.0507e-01,  1.6355e-01, -7.3228e-02,  1.4143e-02,
         1.4635e-01,  5.0510e-02, -1.2605e-01,  3.4025e-01,  9.2113e-02,
         1.0445e-01,  2.7081e-02, -3.7297e-02,  4.2586e-01,  5.4731e-02,
        -1.8532e-01,  1.2426e-01, -2.8934e-01,  6.8911e-02, -2.2929e-01,
         1.8139e-01,  2.0334e-02,  1.2986e-01,  8.2381e-02, -3.0503e-01,
         1.7534e-01, -6.1809e-02, -9.5095e-02, -3.9064e-01, -6.5886e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.3097e-02,  4.5706e+00, -1.3477e-02,  7.5858e-03,  5.0761e-04,
        -2.3749e-02, -3.1262e-02,  4.6874e-03, -8.5334e-03,  9.6352e-03,
        -2.3272e-02,  3.4836e-03, -1.4537e-03,  2.0989e-03, -1.5744e-02,
        -4.7967e-03,  2.2559e-02, -3.8268e-03, -5.0915e-03,  8.2018e-03,
        -1.8756e-03,  1.9157e-02,  7.6692e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4630e-01,  2.2463e+01, -9.0776e-02,  3.6434e-02, -7.0748e-03,
         3.1257e-01,  7.7496e-02, -3.7250e-02, -4.5790e-03, -1.2706e-02,
         1.8687e-01,  9.2378e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 4.2969e-02,  5.9743e+00, -1.1794e-02, -8.9422e-03, -4.9453e-02,
        -1.0184e-02,  1.7606e-02, -4.7034e-03,  1.9639e-02, -1.9055e-02,
         6.0284e-03, -1.7390e-02,  4.5958e-03, -2.7784e-02, -1.1573e-02,
         1.2461e-02,  1.0100e-02,  1.8327e-03, -7.3932e-03,  8.4213e-03,
         1.1965e-02,  1.4975e-02, -1.3769e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5522e-02,  1.4479e+01, -7.1066e-02, -1.8355e-02, -4.9572e-02,
        -1.4299e-02,  5.1308e-02,  4.3823e-02, -1.0646e-02,  1.2529e-02,
         5.7907e-03, -3.1979e-02,  2.0420e-02,  2.9986e-03, -5.0866e-03,
         1.2591e-02,  3.4988e-02, -1.1570e-02,  3.5204e-02,  2.3026e-02,
         2.0395e-02, -1.7943e-02, -2.6230e-02,  4.3014e-03,  1.7849e-02,
        -1.0819e-02, -4.1557e-02,  3.1572e-02, -1.7605e-02,  3.8339e-03,
         2.6309e-02,  2.3207e-01, -6.1205e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0908e-01,  2.2963e+01,  4.2491e-02, -7.9517e-02,  3.5871e-02,
         3.6456e-02,  9.6327e-02,  3.8185e-02,  2.8851e-02, -3.0528e-02,
         9.4066e-02,  1.6391e-03,  4.6759e-02,  8.4402e-02, -1.4912e-02,
        -5.2279e-02,  1.7396e-02, -8.6087e-02, -1.9772e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0361e-01,  3.1894e+01,  7.0577e-02, -2.6510e-02, -1.1396e-02,
         1.4179e-01, -1.3279e-01, -4.2847e-02,  1.8185e-01,  1.0037e-01,
         4.0869e-02,  1.0929e-01,  5.0457e-02, -2.5777e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2719e-01, -1.3055e+02, -2.1182e+00,  3.7117e-01,  3.9399e-01,
         8.7766e-02, -7.2020e-02, -9.3580e-02, -2.1017e-01, -7.6937e-02,
        -2.8956e-01, -3.7308e-02,  1.0607e-01, -4.5000e-01, -3.1688e-02,
         8.2098e-02, -3.1271e-02,  7.4811e-03,  3.8870e-01, -1.7135e-01,
         4.3062e-01, -5.1793e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3669e-01,  9.7080e+00,  8.9096e-04, -4.4343e-02, -3.9783e-02,
        -2.0875e-02,  4.4582e-03, -3.8178e-02, -8.3202e-03, -3.2488e-03,
        -1.6801e-02,  1.7139e-02,  5.8455e-02,  8.9881e-03, -1.0238e-02,
        -5.1883e-02, -1.1126e-02,  1.7466e-02,  9.8186e-03, -9.5622e-03,
        -3.2668e-03,  1.1715e-02,  5.7442e-04, -1.3204e-02, -2.8944e-02,
         4.7904e-03,  1.2292e-02, -1.0730e-03,  8.3564e-04,  2.4284e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9776e-02,  3.3188e+00,  1.0615e-02, -1.2487e-02, -1.1926e-02,
         3.0897e-03,  1.9821e-03, -6.1926e-03, -5.4197e-03,  3.0210e-03,
        -9.3826e-03,  4.1620e-03, -9.8093e-03,  7.8710e-03,  1.0894e-02,
        -1.9632e-02,  1.0611e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2581e-02,  5.2689e+00,  2.7300e-02,  1.8237e-01, -4.3832e-03,
        -2.7813e-03, -3.2576e-02,  3.8119e-02, -2.8435e-02, -1.8256e-02,
        -4.9839e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7863e+00, -1.0894e+02, -1.4715e-01,  1.7135e-02,  1.0029e-01,
        -7.4206e-01, -2.0421e-01, -1.4477e-01, -3.9720e-03,  1.4330e-01,
         1.6976e-01, -2.4160e-01,  1.2824e-01,  3.6656e-01, -2.3302e-01,
        -2.1209e-01,  6.7872e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4623e-02,  2.3796e+00,  8.7503e-03,  1.8334e-02, -2.4714e-03,
         6.0433e-03,  4.3364e-03,  5.7671e-04,  3.1419e-03,  2.7512e-03,
         2.1909e-03, -1.2567e-03, -6.3744e-03, -4.5853e-03,  6.3382e-03,
         1.1666e-03, -3.3429e-03,  7.5940e-04, -1.4745e-02,  6.6548e-03,
         8.3244e-04, -8.9777e-03,  5.3482e-03, -5.1065e-05, -6.0860e-03,
        -1.0910e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3198e-01,  8.1852e+00,  2.4420e-01, -1.1616e-02,  8.0097e-03,
         2.7288e-03,  1.8196e-02,  4.4196e-02, -2.1841e-03,  2.3738e-02,
        -1.6694e-03, -7.5429e-03,  6.1533e-03, -1.4124e-02, -3.1309e-03,
         2.6017e-03, -7.2607e-03, -1.3651e-02,  1.0966e-02, -1.1025e-02,
         5.2694e-03, -2.1336e-03, -5.6337e-03, -1.7766e-02, -1.3003e-03,
        -1.0169e-02,  2.2188e-02, -3.1187e-02, -2.4388e-02, -1.4768e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5003e-01,  1.1704e+01, -7.3189e-02, -4.2018e-02,  3.2328e-02,
         2.3917e-02, -9.8291e-03, -4.2263e-03,  2.7569e-03, -5.7272e-02,
        -1.3281e-02, -1.3292e-02,  6.4092e-02, -3.5912e-02,  8.7267e-03,
        -1.7595e-02,  2.0093e-02,  6.5171e-03,  3.6909e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-5.7137e-02,  2.6916e+00,  9.5871e-03, -2.3636e-03,  2.0087e-03,
        -3.8665e-03,  1.7543e-02,  6.0495e-03,  2.9866e-03,  1.1598e-02,
         1.4637e-03,  3.2223e-03,  3.0958e-03,  1.1013e-03,  2.2354e-02,
         1.0523e-03,  5.1041e-03,  1.4591e-02,  1.9390e-02,  9.2891e-03,
        -7.9104e-03,  8.1378e-03,  2.7336e-03,  9.7371e-03,  1.9869e-03,
         1.7126e-02,  2.7855e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2024e-04,  2.1002e+00,  2.3161e-02, -1.4513e-02, -7.0143e-03,
        -6.7992e-03, -3.8612e-03, -2.6263e-03, -2.9111e-03, -2.6164e-03,
        -1.1791e-02,  7.1323e-03, -1.1878e-03,  2.3799e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8597e-03,  4.7271e-01,  5.4617e-03,  1.9647e-03,  1.9120e-03,
         1.6748e-03,  3.2460e-04,  9.8054e-05,  5.3204e-05,  1.4111e-03,
        -3.0345e-04,  1.4926e-03,  9.8621e-04, -1.2327e-04,  1.1043e-03,
        -2.8346e-04,  2.7513e-04,  4.0596e-04, -6.5079e-04,  1.1223e-03,
        -5.7655e-04, -6.9091e-04,  1.0871e-04,  6.2152e-04, -3.5789e-03,
        -3.6562e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([   1.7936, -110.1345,   -1.1047,    0.6742,    0.1367,    1.5402,
           0.4556,   -0.8257,    0.3427,   -0.3798,   -0.2551,    1.4926,
           0.1919,    1.0418,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7875e-01,  3.3656e+01, -1.4647e-01, -1.0101e-01,  6.0053e-03,
        -2.1396e-02,  1.0101e-01,  2.9766e-03,  7.4983e-02,  2.4087e-02,
         8.0822e-02, -1.6086e-03,  1.0663e-01,  2.7644e-02,  8.1220e-02,
         2.0834e-02,  4.2724e-02, -1.5962e-03,  6.1816e-02, -3.5590e-02,
        -1.2944e-02,  9.0162e-02,  3.0626e-02,  6.2633e-02,  9.0284e-02,
         5.2489e-02,  1.4988e-03,  6.7082e-02, -2.5714e-02, -3.3219e-02,
         1.0701e-01,  9.7628e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.0022e-02,  7.7402e+00,  1.3368e-02,  4.2444e-03, -4.4812e-02,
         3.5328e-03,  1.4241e-02, -1.6411e-02,  4.5904e-03, -2.1514e-02,
        -6.0681e-02,  2.0524e-02,  2.0153e-02, -9.6153e-03,  4.5227e-03,
         5.0649e-02,  1.0884e-02, -2.4632e-02,  1.4183e-02,  2.2442e-02,
         1.3300e-03,  1.3555e-03,  1.6665e-03, -1.1982e-01,  2.5773e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2413, 11.8274,  0.0899,  0.0142,  0.0215,  0.0503,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2981e-03,  4.5868e-01, -1.1465e-03, -5.4406e-04,  8.6210e-04,
        -1.9680e-03,  7.7219e-05, -3.0792e-04, -8.5028e-04,  9.8179e-04,
        -1.1340e-03, -3.4066e-04,  5.6400e-05,  3.4276e-03, -1.2826e-04,
        -4.4614e-06, -1.5797e-04,  5.9947e-04,  1.3645e-03,  5.5331e-04,
        -1.5803e-03, -4.0018e-04, -7.2732e-04, -4.5050e-04, -3.3021e-04,
         9.9318e-04, -5.1072e-05,  1.0179e-03, -2.6723e-04,  2.7036e-05,
         4.4640e-04, -1.7462e-03, -8.6652e-04, -1.3933e-05,  3.2460e-04,
        -1.8470e-03, -8.4747e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2998e-01,  4.6489e+01,  5.6384e-01,  1.3000e-01, -7.3743e-02,
         1.1496e-01,  1.8418e-02,  9.1764e-02, -3.7754e-02,  2.4082e-02,
        -9.8043e-02,  1.6510e-01, -1.7370e-02, -5.2879e-02, -1.0616e-01,
        -8.5054e-02, -1.8951e-02,  2.7108e-01, -3.3507e-02,  2.1971e-01,
        -4.1763e-01, -6.1704e-02,  6.8564e-02,  1.1195e-03, -1.1688e-02,
        -3.3823e-01, -3.6498e-01, -2.0894e-01,  2.8180e-02, -7.6673e-02,
        -1.8183e-02,  1.5373e-02,  1.0219e-02,  5.6914e-02,  1.9554e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.7887e+00, -2.2298e+02,  3.3106e+00, -1.5432e+00,  3.4098e-01,
        -1.2177e+00,  7.3687e-01,  4.7219e-04, -1.7251e-01, -8.4030e-02,
         4.4565e-01,  1.5855e+00,  8.9110e-01,  2.2138e-01,  2.3072e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0352e-01,  1.0513e+02, -1.6364e+00, -1.0341e-01,  6.5526e-01,
        -1.5093e-01, -2.1797e-01,  1.3104e-01,  2.5911e-01,  1.4163e-01,
        -1.5906e-01, -1.5153e-01,  6.3472e-03, -2.0855e-01,  7.3358e-02,
         1.3498e-01,  1.8611e-01,  6.7282e-02,  2.0514e-01,  8.8221e-02,
        -8.4638e-02,  1.7762e-01, -2.1864e-01,  1.0560e-01, -8.9948e-02,
         7.7976e-03,  1.8027e-01, -4.5993e-03, -4.8169e-02,  1.7660e-01,
        -2.9920e-02,  1.0514e-01,  4.2035e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0393e+00,  1.8546e+02,  8.4612e-01, -7.6687e-01, -3.6099e-01,
         2.6407e-01,  3.0782e-01, -2.7133e-01, -1.2589e-01,  2.0081e-01,
         2.5715e-03, -3.7452e-01, -7.5970e-01,  8.1714e-01, -1.3071e-01,
         1.4387e-01,  5.6045e-01, -3.9022e-01,  8.4884e-03,  3.9583e-01,
         8.9743e-01, -1.7619e-01,  8.7807e-02, -6.2709e-02,  2.7756e-01,
        -2.2712e-02,  8.1023e-01, -1.8238e-02,  1.6407e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 6.7178e-02,  3.2930e+00,  1.8498e-02, -1.1208e-02,  9.8611e-04,
        -8.6160e-03,  5.0974e-03, -2.0978e-02, -7.8614e-03, -1.8552e-02,
        -1.0387e-02,  1.2407e-02,  4.5808e-03, -4.7855e-04, -1.3821e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.9989e-02,  1.2472e+01,  1.7003e-01, -1.3884e-02, -2.0459e-02,
         9.3826e-03,  9.1278e-03,  1.1001e-02,  1.0764e-03, -2.9922e-02,
         2.3111e-02,  8.2291e-03,  1.3556e-02,  2.9959e-03, -2.4074e-03,
        -1.5449e-02,  7.0212e-05, -2.9711e-03,  6.0678e-03,  1.6629e-02,
        -2.6520e-04, -4.3264e-03, -8.9810e-03,  2.2267e-02,  1.9267e-03,
         1.4567e-02,  2.0967e-03, -1.1386e-02, -2.0825e-02, -3.2675e-03,
         1.4201e-02, -7.4757e-03, -1.6114e-02,  6.2961e-03,  1.2922e-02,
         1.5879e-02,  1.1138e-02,  1.5685e-02,  4.8167e-02, -2.6954e-03,
         1.2283e-03,  7.6184e-03,  4.0092e-03, -1.2402e-03,  6.3401e-03,
        -1.2343e-02,  1.4701e-02, -3.0080e-02, -7.9559e-03], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.7938e+00, -2.6809e+02, -2.2297e+00,  1.4410e+00, -4.5886e-01,
        -2.8533e+00,  6.7126e-01, -1.1413e-01,  1.4251e+00, -4.3091e-01,
         1.0001e-01, -2.4096e-01,  8.9636e-01,  3.9602e-01, -9.9211e-02,
         3.0238e-01, -7.7364e-01,  1.0739e-01, -1.0915e-01, -7.8591e-01,
         4.0703e-01,  2.7305e-01,  4.9732e-01, -2.0366e-02,  3.0872e-01,
         9.7314e-01, -2.8082e-03,  9.7236e-02, -8.8382e-02,  7.9091e-01,
         1.0267e-01, -6.2629e-01,  3.9729e-01, -2.2560e-01, -2.6548e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.2998e-02,  1.4552e+01,  2.3964e-01, -9.7819e-03,  2.1177e-02,
        -2.7960e-02, -5.0339e-02,  2.2102e-02, -1.7742e-02, -2.8237e-02,
        -3.2218e-02, -3.3549e-02, -2.6290e-02, -2.7553e-02,  1.2706e-01,
         1.0075e-01,  4.8043e-02, -3.5185e-02,  2.3996e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4794e-02,  1.9554e+01, -2.2129e-01,  7.3802e-02, -2.2536e-02,
        -6.1195e-02, -3.5130e-02,  1.1003e-02,  1.6911e-01, -6.4102e-02,
        -1.2116e-01, -3.4899e-02, -1.2878e-03, -2.1230e-01, -5.9543e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.4878e-02,  1.3167e+01, -6.7411e-02,  4.5289e-02, -7.5525e-03,
        -2.2828e-02, -6.9708e-04, -9.5504e-03,  1.1706e-02, -2.4255e-02,
        -2.6216e-03,  2.8417e-02, -8.3420e-03, -1.2594e-02,  1.8291e-02,
        -2.3697e-02, -3.1001e-04, -2.1047e-02,  7.6761e-03,  1.4919e-02,
        -3.8354e-02,  2.5583e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.4646e-03,  1.9195e+00,  6.5087e-03, -8.2359e-03,  1.5222e-03,
        -2.8217e-03, -5.2957e-03, -7.6291e-04, -1.7265e-03, -6.2842e-03,
        -1.6400e-03, -2.9642e-04,  1.1923e-03,  1.4592e-04, -4.2592e-05,
         1.9130e-03,  2.8923e-03,  5.4525e-03,  4.9129e-04, -2.3176e-03,
         4.6817e-04, -5.7862e-04, -4.4665e-03,  3.1151e-03,  4.9302e-03,
         1.6407e-03, -3.5451e-03,  1.5708e-03, -7.8561e-05,  9.4783e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.2110e-02,  4.7263e+00,  2.7295e-03, -1.2775e-02,  1.1031e-01,
        -1.6954e-02,  1.9189e-02,  2.1730e-02,  2.4627e-02, -5.2988e-03,
        -1.9894e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5124e-02,  6.6027e+00,  4.1624e-02,  1.8345e-02,  1.2783e-02,
         6.5289e-03,  4.9605e-02, -8.2838e-03, -1.4827e-02,  3.6587e-02,
         9.8257e-03,  3.7627e-02, -1.7822e-02,  2.6658e-03, -4.3986e-03,
         2.6879e-02, -1.0123e-02, -7.5848e-04,  5.2233e-03, -6.1411e-03,
        -1.2636e-01, -6.5595e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2776e-01, -2.0179e+02, -1.6678e+00,  7.2507e-01, -1.2135e-01,
         8.8362e-01, -1.2569e-02, -1.2147e-02,  4.4363e-01,  1.0127e-01,
         2.9823e+00, -2.0055e-01, -2.2686e-01,  1.0111e-01,  2.8179e-01,
         1.7486e-01, -6.3977e-01,  2.3709e-01,  4.7966e-01,  1.8848e-02,
         2.6608e-01,  3.1142e-01,  7.3436e-01, -4.6455e-02, -3.8185e-02,
         1.2501e-01,  9.9366e-01, -2.4331e-01,  1.4114e+00,  1.5982e-01,
        -9.4907e-01, -1.1705e-03, -1.0318e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.4359e-01, -1.6847e+02, -6.6986e-01,  4.8098e-01, -1.4967e-01,
        -6.0831e-01,  3.2980e-01,  9.4342e-01, -5.4822e-01,  1.3358e-01,
        -9.4609e-03, -2.2935e-01, -1.7102e-01,  4.8356e-03,  6.0162e-02,
        -1.2252e+00, -6.9771e-01,  2.8831e-01, -9.7900e-02,  2.7480e-01,
        -3.3097e-01,  1.0567e+00, -1.3326e-01,  1.4417e-01,  3.6169e-01,
        -8.5498e-01, -7.1524e-01,  6.2450e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1804e+00, -1.1715e+02,  8.4051e-01,  4.0814e-01,  2.7055e-01,
        -6.4179e-02,  1.3680e-01, -1.7870e-01,  5.8036e-01,  2.9682e-02,
         3.2680e-01, -4.9059e-01,  5.7062e-02, -2.8725e-01,  2.8244e-01,
         3.5350e-02, -2.9118e-01, -1.4672e-01, -1.3407e-04,  5.6234e-01,
        -2.1310e-01,  7.0316e-02,  3.6158e-01, -6.5899e-01,  5.7337e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 2.6425e-01,  1.6690e+01, -2.6370e-02, -1.7343e-01,  7.1932e-02,
         1.9038e-02, -8.6710e-02,  1.5071e-02, -1.3212e-02,  3.3691e-02,
         5.7131e-02,  3.0266e-02, -5.2514e-03,  5.4077e-02, -1.3003e-02,
        -7.6648e-02,  2.2118e-03,  1.8259e-02,  1.9253e-02,  3.9805e-02,
         1.0624e-02, -4.9754e-03, -3.8754e-02, -6.6857e-03, -4.2232e-03,
        -1.0744e-02,  6.2543e-02, -1.9289e-02, -2.1724e-02, -8.1262e-03,
        -7.2842e-02, -2.7101e-03, -7.4692e-02, -1.6138e-02,  1.8841e-03,
         1.2773e-02,  3.5540e-03, -4.7314e-02,  3.6500e-02, -1.8239e-02,
         2.3767e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2416e+00,  6.8311e+01, -1.0453e+00, -4.2398e-01,  1.9331e-01,
        -2.4799e-01, -8.3872e-02, -5.5192e-02, -2.4722e-01, -6.2724e-03,
        -2.2423e-01,  5.1585e-02,  1.8068e-01,  2.7607e-02,  3.1873e-01,
        -3.0358e-01,  2.9552e-01,  1.2937e-01,  3.1589e-02,  2.8691e-01,
        -7.9245e-02, -1.4355e-02, -5.6374e-02,  1.2643e-01,  1.8736e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6537e-01,  3.9044e+01, -8.1711e-02, -7.8325e-02, -5.2057e-02,
         2.1342e-01, -1.0407e-02,  3.3141e-01, -7.7534e-02,  4.1706e-02,
         9.4100e-03,  1.6977e-01,  1.1152e-01,  8.2293e-02, -2.2017e-02,
         1.6966e-02,  2.0001e-02,  2.2277e-02,  4.5713e-02,  6.5973e-03,
         6.6733e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.5978e-03,  7.5799e-01, -1.9292e-03, -4.4695e-03, -3.2098e-04,
        -2.9358e-03,  1.9320e-04,  1.3962e-03, -1.1768e-03, -1.3909e-06,
         3.9123e-03, -8.0732e-04, -1.5741e-03, -1.4244e-03, -2.8043e-03,
         4.1368e-03,  7.1484e-03, -1.2787e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3597e-02,  2.2328e+00,  1.6026e-02, -3.6795e-03,  5.9170e-03,
         4.2064e-03,  5.6153e-03, -2.5144e-03,  1.5104e-02,  6.8978e-03,
        -2.8978e-03, -2.6163e-03, -9.8900e-03,  1.6695e-03, -2.0733e-03,
         1.0932e-03,  1.7694e-02,  2.3644e-03, -9.1306e-05, -5.0078e-03,
         4.3649e-03, -3.1139e-03,  8.8373e-04, -2.9813e-03,  7.4546e-04,
        -3.3293e-03, -9.8670e-04, -1.2685e-03, -2.9325e-04, -1.1858e-03,
         3.8481e-03,  5.9221e-03, -5.1911e-03,  3.1501e-03,  2.4465e-03,
         5.1233e-04, -7.9037e-03,  9.7660e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2170e-01,  5.9263e+01, -8.3481e-01, -2.6171e-01, -2.5657e-01,
        -7.0506e-03, -2.5081e-01, -1.1841e-01,  3.8518e-02, -6.7033e-02,
         2.4935e-01, -2.2432e-01,  5.9690e-02,  4.8122e-02, -1.9208e-02,
        -1.5010e-01,  3.6999e-01, -3.8338e-01,  3.8462e-01,  2.3685e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1108e-01,  1.7393e+01, -1.6171e-01, -1.5657e-01, -7.1760e-02,
        -1.6749e-02, -1.7174e-01,  6.9933e-02, -7.5416e-04, -2.0937e-02,
         2.8527e-02, -1.6110e-01, -2.6569e-02,  1.1979e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4779e-01,  1.0363e+01, -3.0626e-02,  2.5568e-02, -1.9735e-02,
        -1.7479e-02, -1.9195e-02,  5.0528e-03,  2.8460e-02,  1.0250e-02,
         2.0989e-02,  8.6330e-02,  1.3213e-02, -6.6947e-03, -9.6443e-03,
        -9.0260e-03,  6.4012e-03,  2.9432e-02,  1.8770e-02,  1.4185e-02,
         2.8585e-03,  2.9992e-02,  2.9214e-02, -4.6264e-03,  7.6125e-03,
        -2.0441e-02,  1.2191e-02,  2.4619e-02,  3.0073e-02,  2.9638e-02,
         8.9212e-03,  1.0082e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0475e-01,  6.0665e+01,  3.3934e-02,  8.7530e-01, -6.5293e-01,
        -5.2614e-02,  2.5738e-01, -1.8278e-01,  7.3303e-02,  1.8848e-01,
         1.0982e-01,  6.8649e-01, -1.6729e-02, -3.3227e-02, -1.4332e-01,
         5.0439e-02,  3.5002e-02, -4.3108e-01, -4.5745e-01, -1.1177e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1121e-02,  8.1601e-01,  7.7944e-04, -4.8687e-03,  2.1031e-03,
        -1.1854e-04,  6.4337e-05,  2.2494e-04, -2.5353e-03, -4.8757e-05,
         2.8419e-03,  3.4375e-04,  1.9208e-04,  4.7014e-04, -7.3408e-04,
        -3.1596e-04, -3.6173e-04, -4.7620e-04, -1.7989e-03, -1.4990e-04,
         1.1762e-03, -2.4469e-03, -1.6702e-03,  1.4806e-03,  2.5450e-03,
        -2.0406e-04,  2.0039e-03,  7.9110e-05, -3.5152e-03, -1.0844e-03,
         3.9885e-04,  1.2711e-03,  6.0550e-04,  2.0793e-03, -4.2000e-04,
         3.0627e-03, -2.1366e-04,  7.8024e-04,  7.9315e-04, -3.5648e-04,
        -1.3502e-03, -3.7220e-04,  1.5452e-03, -3.3264e-04, -2.6353e-04,
         2.7400e-03,  6.1094e-04,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8509e-01,  9.7128e+01,  7.4873e-01, -1.0389e-01,  1.1546e-01,
         3.0928e-02, -1.0055e-01, -2.7459e-01,  5.1619e-01,  4.3268e-01,
         1.8820e-01,  3.6305e-01,  2.2908e-01, -1.1637e-01, -1.7199e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7504e-02,  3.4817e+00, -1.5775e-02, -1.8476e-02, -1.5756e-02,
        -1.2964e-02, -3.7754e-03,  2.6759e-03, -3.8594e-03,  1.5298e-03,
        -5.9694e-03, -3.3715e-03, -4.1595e-03,  7.6537e-03, -7.2249e-03,
        -8.6393e-03, -1.1912e-02,  2.8606e-03,  3.1825e-03, -3.0338e-03,
        -1.3187e-03, -6.6982e-03, -7.1577e-04,  5.3396e-03, -8.0015e-03,
        -6.2628e-03,  2.9146e-03,  7.4463e-02,  7.0773e-03,  5.3047e-04,
        -9.1816e-04, -1.7714e-02, -1.0985e-03,  1.2122e-03, -7.8691e-05,
        -3.4941e-03, -2.2727e-03, -2.3839e-03, -1.9995e-03, -2.6561e-03,
         3.1980e-04,  1.1486e-03,  5.1213e-04, -4.1462e-03,  5.3865e-03,
        -2.4587e-03,  3.3989e-03,  1.7566e-02], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 7.0058e-01, -2.2078e+02, -6.7177e-01,  1.7441e+00,  7.4205e-02,
         2.6286e+00,  4.3802e-01,  3.1028e-01,  9.7338e-02,  1.0631e+00,
         2.7565e-01,  7.0041e-01,  3.7261e-02,  2.9970e-01,  1.2574e-01,
         2.8088e-01,  5.6015e-01,  4.8436e-01,  7.7696e-02,  3.2807e-01,
         5.2197e-01,  1.3968e-01,  5.4355e-01, -9.2738e-02, -4.9371e-01,
         7.9562e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5426e-02,  1.5207e+01, -2.1834e-02, -1.4367e-02,  1.0744e-02,
        -1.1739e-02,  3.4526e-02,  5.0628e-02,  5.1233e-02, -9.8256e-03,
         9.1573e-02, -1.4221e-02,  1.2433e-02, -1.1375e-02, -8.7818e-03,
        -2.6854e-02,  1.0770e-01, -3.0906e-02,  2.8928e-02,  8.7143e-03,
         8.2493e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4977e-03,  9.1758e-01, -6.0494e-03, -2.9425e-03,  1.9740e-04,
        -7.0279e-03, -1.8722e-03, -1.5569e-03,  1.2063e-03,  8.4156e-05,
         9.2908e-04,  4.8569e-04, -4.9475e-04, -1.7349e-04,  2.8932e-03,
        -3.4091e-03,  2.3170e-03,  1.8475e-03, -1.1259e-03,  1.1297e-03,
        -1.3795e-03,  4.8487e-04, -4.7530e-03,  3.1044e-03,  1.5821e-03,
        -6.8533e-04, -2.1933e-03,  3.3314e-04,  4.7593e-04, -4.8689e-04,
        -3.7551e-03, -1.6361e-03,  2.2972e-03,  1.0850e-03, -2.8225e-03,
        -1.8887e-03,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5752e-02,  7.9400e+00, -1.3463e-02,  1.5945e-02,  2.9473e-03,
         7.7265e-02,  1.8991e-02,  3.1584e-02,  4.2368e-04,  1.9147e-02,
         5.3328e-02,  1.7269e-02, -7.0202e-03,  1.4127e-02, -1.5661e-02,
         7.0209e-03,  2.8229e-02,  4.0439e-02, -3.1481e-02,  7.2854e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7332e-02,  1.4159e+01, -5.3558e-02,  3.0123e-02,  2.2401e-02,
         9.4111e-03, -3.3871e-02,  7.3267e-03,  1.2490e-03,  1.6392e-02,
         8.2951e-02, -8.7485e-02, -4.4822e-02, -3.4594e-02,  6.7577e-02,
         1.3331e-02, -4.3790e-03, -1.4354e-01,  4.5412e-03, -4.9663e-02,
         1.8981e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8033e+00, -1.5058e+02, -1.0382e-01, -9.0906e-01,  6.8648e-01,
         2.9548e-01,  3.3164e-01,  1.3229e-01, -1.6629e+00, -3.4619e-01,
        -2.0302e-02,  1.5005e-02, -6.9349e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8403e+00,  8.2312e+01, -8.4836e-02,  9.3498e-02,  5.6962e-02,
        -2.5866e-01, -3.6464e-01,  9.4472e-02, -1.1440e-01, -2.2290e-01,
        -2.4694e-01, -3.2969e-01,  2.2018e-02,  2.6611e-02, -3.4288e-01,
        -1.6494e-01, -1.8062e-01, -1.3302e-01, -1.4916e-01, -1.9386e-01,
         6.2434e-02,  1.9139e-01, -4.2692e-01, -2.2777e-01,  6.0754e-02,
         7.8507e-03, -4.1176e-01,  9.8417e-03,  5.8194e-02, -5.9542e-02,
         5.7345e-02,  5.4964e-03,  2.6175e-01, -1.0117e-01, -9.5831e-02,
        -2.6806e-02,  4.5302e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5813e-01,  3.2129e+01,  3.4267e-01,  1.8807e-01,  4.4777e-02,
        -1.1841e-02,  1.5894e-02,  4.9967e-03,  5.9998e-03,  1.4364e-02,
        -5.2460e-02,  5.8622e-02, -6.9423e-02, -4.0522e-02,  1.8087e-02,
        -8.9316e-02, -5.0167e-03, -4.9628e-02, -2.9073e-02, -1.1874e-01,
         4.4676e-02, -4.9482e-02, -9.1516e-02,  6.6723e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3407e+00, -1.1935e+02, -1.5956e+00, -5.5531e-01, -2.1209e-01,
         1.1996e-01, -1.4287e-02, -3.3545e-01, -2.0378e-01,  6.6414e-01,
         4.8244e-01, -3.5097e-01,  1.1441e+00, -2.2441e-01, -4.3761e-01,
        -2.1470e-01,  2.5796e-01,  3.1384e-01,  8.2640e-02, -1.1846e-01,
         1.4397e-01,  3.5856e-01,  3.9720e-02,  1.1528e-01,  3.9160e-02,
         1.3171e-01,  2.2776e-01,  1.0133e-02,  1.3134e-01,  1.8018e-01,
         8.7072e-02,  1.3048e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2039e+00,  4.5335e+01,  3.9452e-01,  1.3718e-01,  5.9537e-02,
        -7.0685e-01,  2.9362e-02,  3.3645e-02, -1.8100e-01, -7.6404e-03,
        -1.0808e-01,  7.6817e-02,  5.6603e-03,  8.7781e-02, -6.2102e-02,
         4.8913e-02,  9.9738e-02, -6.5954e-02,  1.3064e-01,  3.9428e-02,
         8.8796e-02,  1.6631e-01, -2.3528e-02, -7.1363e-02,  1.5880e-02,
         2.1454e-01,  1.0696e-01, -2.6743e-02,  4.4361e-02,  2.3216e-02,
         3.2744e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0053e+00,  1.5525e+02, -2.9822e-01, -1.3646e-01, -8.8876e-02,
         7.7930e-02,  9.9623e-01, -1.3228e+00,  2.8965e-01, -4.4822e-01,
        -7.0780e-01, -7.6340e-01,  2.8971e-01, -1.2396e+00, -3.7176e-01,
        -2.3594e-01,  2.3155e-01, -1.2842e-01,  6.0602e-02, -8.5610e-03,
        -1.1651e+00, -9.7130e-02, -1.2610e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.5674e-02,  2.0119e+01,  2.2245e-01, -7.3233e-02, -8.1210e-02,
        -2.1371e-02, -4.9491e-02, -3.1058e-02, -6.3982e-02,  1.3746e-02,
        -6.9837e-03, -4.8534e-02,  5.8756e-04, -4.0740e-02, -4.0792e-02,
        -1.1585e-02, -5.2607e-02, -1.6330e-02, -8.1188e-03,  3.8535e-03,
        -3.0402e-02, -1.0111e-02,  4.1628e-02, -6.7497e-03, -2.2932e-02,
        -3.2946e-02,  3.8178e-02,  4.7701e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 1.1537e-02,  4.2226e+00,  3.2877e-02, -9.0582e-03,  1.2514e-03,
        -1.2025e-02,  3.3249e-03, -1.9564e-03, -1.8650e-02, -3.3010e-03,
         1.3128e-02, -9.9681e-03,  8.6578e-03,  3.0621e-03, -2.5439e-03,
         5.9845e-04,  4.7609e-03, -6.6402e-03, -1.4782e-03, -1.5208e-02,
         5.4129e-03,  5.4004e-05, -4.4948e-03, -5.3624e-03,  1.8520e-03,
         3.3406e-03,  1.1842e-02, -6.2911e-03,  5.6881e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3904e+00,  1.2061e+02, -6.8184e-01,  2.0793e-01,  1.5245e-01,
         5.6539e-01,  1.5765e-01,  3.1051e-01, -1.5717e-01,  5.1171e-02,
        -1.9103e-01,  4.8555e-02, -2.1670e-01,  2.3079e-01, -1.0877e-01,
         1.0232e-02,  1.4055e-01,  1.5608e-01,  5.9192e-01,  8.9038e-02,
         9.5320e-02,  8.4543e-01, -1.1310e-01, -9.3326e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.1633e-02,  1.0250e+01, -6.6505e-02,  2.1271e-02, -5.6323e-04,
         5.9536e-02,  1.2747e-02,  2.9148e-02, -3.4104e-02,  1.3314e-02,
         1.7163e-02, -4.3790e-02,  7.9520e-02,  3.5134e-02, -6.1543e-03,
         1.4007e-02,  3.5052e-02,  1.0722e-02,  5.4075e-02,  1.5917e-02,
         1.9271e-02, -6.1981e-03,  1.8054e-02,  3.5152e-02,  2.3266e-02,
         4.8738e-05, -3.7879e-02,  1.8337e-03, -3.8063e-03,  1.1539e-02,
         1.2306e-02,  2.3497e-02,  5.7558e-03, -9.8813e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8393e-01,  1.0046e+02, -3.1311e-01, -3.5181e-01,  1.1235e-02,
        -1.5293e-01,  2.6787e-01, -1.5260e-01, -1.2373e-01, -2.3248e-01,
        -7.4323e-02,  4.6877e-01, -3.4271e-02, -6.9401e-02,  8.3204e-02,
         4.0847e-03, -4.3468e-01, -3.9216e-03,  9.5951e-02,  3.9360e-01,
        -1.1635e-01,  2.3217e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3215e-02,  9.5321e+00, -1.1693e-01,  1.4652e-02, -9.2089e-02,
         1.5360e-02,  3.4598e-03,  3.5608e-02,  2.4895e-02, -1.9299e-02,
         5.8965e-02, -2.0288e-02, -1.6351e-02,  2.5269e-02, -5.5025e-03,
         5.5964e-03,  1.2977e-01,  1.1173e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5760e-02,  1.5318e+00,  7.3558e-03,  2.6467e-03, -1.9643e-03,
        -3.6528e-03,  4.7625e-03,  4.5805e-03,  5.3751e-04,  2.0337e-03,
        -5.9613e-04, -8.5452e-04, -4.0872e-04,  2.8707e-04,  6.8917e-05,
        -1.4612e-03, -1.9452e-03, -6.2655e-03, -8.1786e-04, -6.2098e-03,
         2.5499e-03, -6.2735e-03, -1.6651e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8134e-01,  1.0844e+02,  8.3684e-01,  2.2834e-01, -5.2937e-01,
         3.2926e-01,  1.6842e-01, -6.7498e-02, -6.5998e-02, -1.2828e-01,
         1.3347e-01, -1.4672e-01, -1.1152e-01,  3.8560e-01, -2.3576e-02,
         8.1811e-02,  6.5224e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6071e-01,  1.8462e+01, -1.2671e-01,  1.4107e-01,  2.6245e-02,
        -4.1203e-02, -4.7062e-02, -5.5845e-03,  2.2061e-02,  1.5846e-02,
        -4.3749e-02, -6.4088e-02, -5.0475e-02, -3.2169e-02,  1.3492e-01,
        -1.1805e-01, -4.1603e-02, -2.2200e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.6746e-01,  3.5625e+01,  9.8662e-02, -6.2305e-02,  4.4932e-02,
         1.0740e-03, -2.7810e-02, -2.8698e-01, -3.3963e-02,  5.1899e-01,
         1.3900e-01, -1.6828e-01, -3.5022e-02, -8.9346e-02,  1.6326e-02,
         7.3729e-02, -2.4422e-02,  3.3187e-01,  9.3466e-03, -2.7204e-02,
        -7.6255e-02, -5.0084e-02,  6.2384e-02, -9.7171e-03,  2.2396e-03,
        -1.8521e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0228e-01,  6.5784e+00, -4.0162e-02, -1.0913e-02, -5.1970e-02,
        -1.2314e-03, -1.1981e-02,  1.7066e-02,  4.8780e-02, -2.0902e-02,
        -9.2626e-03, -1.2029e-02, -9.3684e-03, -3.8335e-02, -1.2172e-02,
        -9.0997e-03, -1.0176e-02, -1.9290e-03,  5.9110e-03, -7.0906e-03,
        -8.4762e-03, -9.3803e-03, -1.3924e-02, -7.3517e-03, -9.9748e-03,
         3.1971e-03, -7.2994e-03, -5.9476e-03,  2.0415e-03, -9.5217e-03,
         1.4082e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.1106e-03,  2.4637e+00,  5.1389e-05, -7.2149e-03, -2.9571e-02,
         1.6897e-03, -4.9961e-03,  1.3017e-02,  4.4547e-04, -1.9131e-02,
         7.1233e-03, -5.7022e-03, -6.8974e-03, -6.6793e-04, -3.4391e-03,
         3.2372e-03,  3.8217e-04,  2.5692e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3758e-02,  1.7560e+00,  1.1946e-02,  2.8318e-02,  1.2230e-03,
        -4.8393e-05, -5.4424e-03, -8.2853e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 8.2365e-02, -1.4014e+02, -7.5327e-02,  1.1845e-01, -7.3226e-02,
        -1.8629e-01,  1.2481e-01,  5.8191e-02,  3.3572e-01,  8.9921e-02,
         2.8412e-01,  7.2638e-01,  4.6352e-02,  1.5777e-01,  8.6780e-02,
         1.3982e-01, -1.7969e-01,  4.5288e-01,  1.0987e-01,  4.2908e-01,
         4.0687e-01, -1.7280e-02,  3.5450e-01, -7.3775e-02, -3.3931e-01,
        -2.3228e-01, -3.4492e-01, -4.7026e-01,  7.5255e-02,  3.2584e-02,
        -3.9316e-01, -1.2997e-01, -4.1421e-02,  7.9806e-02, -4.5029e-01,
         4.8991e-02, -2.8183e-02,  5.5109e-02, -1.1741e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7591e-03,  3.3734e+00, -4.0620e-02, -1.5845e-02, -2.6926e-04,
         4.4914e-05, -3.1796e-03, -4.5549e-03, -4.5751e-04,  1.1539e-02,
        -5.1785e-04, -5.5959e-03, -2.3950e-03, -9.5690e-03,  8.7528e-04,
        -1.7045e-02, -4.4679e-03, -2.1603e-03, -4.9249e-04,  2.0243e-03,
         4.3472e-02,  2.4523e-03,  1.5510e-02,  2.3309e-03,  8.1379e-03,
        -4.5174e-03, -1.2510e-02,  5.9180e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1353e-03, -5.3757e+01,  1.1402e-01,  1.0021e-01,  3.5420e-02,
         7.1414e-02,  2.3270e-01, -1.9964e-01, -2.5354e-01, -8.2566e-02,
        -1.0554e-01, -1.2315e-01, -4.5041e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9720e-01,  1.0083e+02,  1.9323e-01, -5.7495e-01,  1.1954e-01,
        -4.8354e-02, -4.3510e-01,  1.8329e-01, -3.3747e-02, -3.6856e-01,
        -3.3082e-01, -9.7192e-02, -3.9459e-02,  1.4452e-01,  1.7368e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5370e-01,  6.8537e+01,  1.2300e-01,  1.5561e-01, -3.0356e-01,
        -2.9536e-02,  2.4964e-01,  6.8735e-03,  6.3328e-02, -5.7459e-02,
        -7.3235e-02, -1.0133e-02, -3.7677e-01,  5.4936e-01,  5.2104e-02,
        -7.0579e-02, -8.1739e-02, -4.5410e-02,  1.9104e-02,  1.2597e-01,
         1.9870e-01, -4.1949e-02,  4.9486e-02, -2.3949e-02, -5.6383e-02,
         1.0400e-01,  1.2741e-01,  1.5911e-01,  5.9094e-02,  1.1390e-02,
        -2.5821e-01,  3.8788e-02,  9.3546e-03,  4.1948e-02,  1.8819e-01,
         5.5567e-02,  3.3683e-02, -1.7837e-02,  8.3536e-02,  4.5275e-02,
         1.6517e-01, -1.1926e-01,  1.1324e-01, -6.6323e-03, -1.0250e-01,
         2.1935e-02, -1.2697e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9480e-01, -1.5937e+02,  1.7448e+00,  7.4292e-01,  1.5796e-01,
         5.5230e-01, -8.9973e-03, -9.8145e-02,  5.1998e-01, -2.2918e-01,
        -4.3904e-01,  2.0767e-01,  7.4963e-02, -1.2051e-02, -1.2395e-01,
        -2.4495e-01, -1.7046e-01,  1.2580e-02,  1.7453e-01, -5.4226e-01,
        -4.7896e-02, -3.4354e-01, -2.6379e-01, -1.4572e-02,  1.3105e-01,
        -1.7122e-01, -8.6352e-01, -1.7823e-01, -1.5228e-01, -1.6771e-01,
         4.1121e-01, -2.0209e-01, -1.6924e-01, -1.1839e-02, -4.7397e-01,
         5.3432e-02, -4.8860e-02,  7.6767e-02, -1.8130e-01, -1.9101e-01,
         3.6626e-02, -6.0732e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5806e+00,  7.8640e+01,  2.3668e-01, -3.1056e-01,  1.9748e-02,
        -6.0106e-01,  1.7562e-01, -3.4130e-01, -6.6375e-02, -2.6398e-01,
        -7.0383e-01, -3.2358e-01, -1.0249e-01,  5.2460e-02,  2.3570e-01,
        -3.7372e-02,  2.4683e-01, -1.3713e-01, -1.7955e-02,  6.8904e-02,
         1.4591e-01, -3.8235e-02, -6.1811e-01, -1.2128e-01, -1.6741e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5216e-01,  8.5423e+00,  4.8387e-02,  1.2944e-02, -1.4019e-02,
        -5.6814e-02,  5.0861e-02, -8.8377e-03,  4.7536e-02,  1.7529e-02,
         1.2575e-02, -2.4007e-02,  7.7595e-03, -3.0648e-02,  5.1190e-03,
         4.8569e-03,  4.2085e-03,  3.5153e-02, -2.1877e-02, -9.2182e-03,
        -3.5252e-02, -2.3448e-02, -6.2296e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4702e-01,  5.3891e+01,  2.5320e-01, -3.6797e-03, -7.4934e-02,
        -2.8635e-01, -1.0644e-01, -3.2484e-02, -1.0563e-01,  9.3505e-02,
        -8.2978e-02,  3.1952e-02,  2.4201e-01,  6.3027e-02,  9.9880e-03,
        -1.4965e-01,  1.4383e-01, -6.0957e-02, -4.0442e-01,  4.8771e-02,
        -8.3846e-02, -1.0881e-01,  9.3102e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7337e-02,  6.9985e+00, -7.5230e-02, -3.7203e-02,  1.9298e-02,
        -1.7645e-02,  2.7765e-03,  8.7964e-03,  3.3471e-02,  1.8448e-02,
        -5.2849e-02, -7.0145e-02, -9.3991e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6200e-01,  2.1042e+02, -2.3357e+00,  5.3173e-01,  3.0313e-02,
        -1.6233e-01, -2.9907e-01,  8.2616e-01,  1.1858e+00,  3.6906e-01,
        -3.8715e-01, -1.3256e+00,  5.3002e-01, -4.9415e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4871e+00,  8.2229e+01,  1.4138e+00, -1.1980e-01, -1.8173e-01,
         8.7299e-02,  6.7125e-01, -2.5387e-01,  1.5021e-01,  5.3018e-01,
         1.0384e-01,  4.0490e-01,  1.2708e-01,  4.7703e-01, -1.3864e-01,
         2.4235e-02, -1.2061e-01, -4.8773e-02,  3.0329e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 8.2016e-01,  5.6863e+01, -3.1639e-01, -6.9968e-03, -6.1049e-02,
         3.9436e-02,  5.3714e-02, -8.6413e-02, -2.3817e-02, -6.4050e-02,
        -3.4453e-02,  1.8868e-01,  7.3251e-02,  1.4358e-01,  1.9729e-01,
        -3.3156e-02,  1.3804e-01, -9.6745e-03,  1.2016e-01,  3.5146e-01,
         1.0651e-01, -6.1903e-02, -1.2865e-01,  6.2070e-02, -1.6938e-01,
         1.9858e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1922e-02,  4.4915e+00, -9.1598e-03, -6.5406e-03, -1.5510e-02,
        -7.3393e-03, -3.1076e-02, -1.3271e-02, -3.3103e-02,  7.3536e-02,
         8.2040e-03, -9.7972e-02, -1.2895e-02, -1.1529e-02, -2.7599e-02,
         2.5046e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7169e-03,  6.8523e+00,  3.0018e-02,  1.2864e-01,  1.0521e-02,
         6.3282e-02, -2.0428e-03,  7.0820e-03, -3.2960e-02,  3.9450e-03,
        -1.9171e-02, -6.0870e-02,  5.4810e-03,  3.6102e-02, -1.6473e-02,
        -3.7666e-04,  4.7424e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5237e-01, -3.8534e+01,  1.6710e-01, -4.0848e-01,  3.1748e-02,
         2.1639e-01, -1.0039e-01,  3.0461e-02, -3.8940e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9660e-02,  2.6563e+00, -7.6600e-02, -1.2816e-02, -1.0952e-02,
         3.6680e-03,  3.1454e-03,  4.9886e-03, -1.8928e-02, -2.1450e-03,
         2.5822e-03,  5.8607e-03,  6.6645e-03,  8.0047e-04, -7.0622e-03,
        -1.7638e-03,  6.2348e-03,  7.3164e-03,  6.1142e-03,  7.7323e-03,
         6.6047e-03,  3.3358e-03,  6.7904e-03, -4.6054e-03,  5.4467e-03,
        -1.3922e-03, -2.6720e-03, -5.3410e-04,  1.9143e-03,  5.1388e-03,
         1.9894e-03,  4.2144e-04,  1.2777e-02,  5.3177e-03,  5.8978e-03,
        -1.0436e-04, -4.3101e-03,  5.6151e-03, -6.6064e-04, -3.2411e-03,
        -4.9759e-03, -1.8424e-02, -4.4476e-03, -8.9406e-03, -7.4385e-04,
        -4.3109e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0205e-03,  1.5255e+01, -5.9198e-02, -6.7041e-02,  2.6909e-02,
         8.9088e-03,  5.9841e-04, -1.6521e-02,  2.4375e-02, -1.1384e-02,
        -1.5926e-03, -6.2196e-03, -8.7408e-02, -1.4555e-02,  5.9045e-03,
         4.8657e-02,  1.8360e-02,  4.8423e-02,  7.7181e-03,  1.5437e-02,
        -1.0115e-02,  2.0937e-02,  1.9770e-02, -2.2146e-02,  3.3436e-02,
        -5.9337e-02,  2.2995e-02,  2.0400e-02,  5.3436e-02,  6.8446e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1384e-02, -1.7598e+01, -1.5250e-02, -2.9420e-01,  5.2261e-02,
         1.2127e-01, -1.0702e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.2762e-01,  1.1525e+02, -3.4007e-01, -4.5638e-02,  5.6861e-01,
         2.7132e-01,  1.7662e-01,  1.0136e+00,  6.8644e-02,  3.7645e-01,
        -4.9732e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6745e-02,  3.4884e+00,  5.1422e-02,  7.7584e-03,  3.9470e-02,
        -1.6386e-02,  2.5343e-02, -1.6078e-02,  1.4601e-02, -5.8629e-04,
        -1.3557e-02,  7.3130e-02, -9.4066e-03,  1.3481e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2879e-01, -2.3175e+02,  5.5760e-01,  6.6502e-01, -7.0479e-02,
        -1.8619e-01, -3.1278e-01, -3.2705e-01, -4.1227e-01,  1.6768e-01,
         9.0291e-02, -7.3417e-01,  3.6544e-01,  1.6072e+00,  2.3577e-01,
        -1.1808e-01, -1.3968e-02,  1.9150e-01, -3.3077e-01, -1.2299e+00,
        -1.4033e+00, -5.8872e-01,  7.1552e-02,  2.8028e-01,  1.2962e-01,
        -2.4353e-01,  1.0831e-01,  3.7331e-01, -8.1502e-02, -5.5172e-01,
        -3.7703e-01,  4.1407e-01,  2.3381e-01, -7.2272e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5257e-01,  1.5276e+01, -1.6241e-01,  1.9273e-01, -2.8438e-02,
         1.7252e-02, -1.4978e-02, -8.1360e-03,  1.9531e-04,  9.5806e-03,
        -3.5070e-03,  1.5847e-02,  2.9776e-02,  6.9679e-03,  1.9847e-01,
        -3.5475e-02, -1.6895e-01,  5.1354e-03, -5.0244e-02, -6.3559e-03,
        -3.5926e-02,  1.3117e-02, -1.3291e-02, -2.9097e-02, -1.6955e-02,
        -2.6045e-02, -2.4432e-02,  3.3474e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.9774e-02,  1.7851e+01, -1.1422e-01, -1.6161e-02, -3.2019e-02,
         2.9518e-02, -3.7156e-02,  2.5815e-02, -2.4656e-03,  4.3198e-02,
         1.4043e-02, -6.8468e-02,  7.1736e-02,  1.5478e-01,  7.9201e-02,
        -3.6116e-02,  2.1603e-02, -2.8779e-02,  3.8345e-02, -1.0290e-02,
        -5.0767e-02,  3.2754e-02, -4.8054e-02, -1.4743e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-2.0551e+00, -1.7631e+02,  7.6547e-01, -8.2100e-01, -2.4425e-01,
        -3.4746e-01, -6.1789e-01,  1.6413e-01, -8.1102e-01, -2.9371e-01,
         4.8319e-01,  3.1486e-01, -5.7188e-01, -4.4208e-01, -1.4647e-01,
         4.5992e-02, -1.2950e-01, -2.2807e-01, -1.6449e-01, -7.9778e-01,
         2.7389e-01,  1.0064e-02, -4.7006e-01, -5.9898e-02,  1.8971e-01,
        -4.6112e-01,  3.7824e-02,  1.3169e-01, -1.4066e-01, -1.2666e-01,
        -8.0203e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.1680e-01,  4.4709e+01,  3.2621e-01, -1.2795e-01, -1.4291e-02,
        -1.2691e-02, -6.1421e-02, -9.7374e-02,  5.9054e-02,  6.0351e-02,
         2.1739e-02, -6.7532e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9586e+00,  3.4313e+01,  1.3356e-01, -3.3378e-01, -1.3742e-01,
        -1.1601e-01, -1.9278e-01, -1.1345e-01, -3.7567e-02,  1.1232e-01,
         9.5738e-02, -4.1177e-01,  1.0172e-02, -6.9021e-02,  1.5989e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8685e-01, -1.8540e+01, -1.8726e-01,  1.0221e-01,  2.6407e-02,
        -2.1005e-02, -3.8281e-02,  4.1543e-02, -5.5160e-04,  3.8431e-03,
         3.6758e-02, -5.6129e-02,  1.5950e-02, -8.9834e-03, -6.3981e-02,
         1.0322e-02, -3.6490e-02, -1.6841e-02,  5.9264e-02,  2.6787e-03,
         1.3967e-02, -7.9093e-03, -2.0382e-03, -4.9448e-03, -3.5902e-03,
        -5.6378e-02, -1.1693e-02,  1.4362e-02, -1.5470e-02, -1.7663e-02,
         5.1573e-03, -2.5278e-02, -6.7000e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0397e-01,  4.6242e+01, -1.0300e+00, -1.0757e-02, -1.0107e-01,
        -1.0594e-01, -8.9344e-02,  2.4380e-02, -8.4496e-02, -1.3220e-01,
        -1.8585e-01, -1.1452e-02, -1.4883e-01, -7.7243e-02,  1.8691e-01,
         6.1646e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2076e-01,  4.4553e+01,  3.0075e-01, -2.4297e-01,  3.2213e-01,
        -1.7790e-01,  1.2211e-01,  3.7720e-02, -4.2303e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0614,  7.9877, -0.0514,  0.1120,  0.1430,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1043e+00, -2.2598e+02, -2.0371e-01,  3.5263e-01, -4.4679e-01,
         1.1789e+00,  3.2730e-01,  3.4965e-01, -6.3727e-01, -4.2742e-01,
        -3.6204e-01,  4.1060e-01,  5.8079e-01,  1.0539e-02, -4.4834e-01,
        -3.6446e-01,  1.8962e-01,  1.3287e-02, -5.4240e-01,  2.1023e-01,
        -3.3608e-02, -4.4688e-01,  1.0588e-01, -6.6333e-01,  1.0305e-01,
        -3.7177e-01,  1.0517e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  3.8706, -78.0719,   1.0400,  -0.3715,  -0.3662,  -0.3755,  -0.3734,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6237e+00, -1.5389e+02, -3.0203e-01, -2.8861e-01,  1.3585e-02,
         1.3311e-01,  7.4991e-02,  7.2297e-02,  3.8889e-01, -1.4392e-01,
        -1.4648e-01,  3.8790e-01,  2.7610e-01, -4.1933e-01, -4.1301e-02,
         1.8728e-01,  5.0022e-02, -1.9060e-01,  5.2200e-02,  1.6838e-01,
         4.0031e-01, -3.7912e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2204e-02,  1.0491e+01,  1.0434e-01,  1.9483e-02, -3.4440e-02,
         3.5749e-02, -2.6417e-02, -1.6759e-02, -3.0526e-02, -4.0322e-02,
        -6.3659e-02,  7.1395e-03, -4.4829e-02, -5.9298e-02,  3.6515e-03,
        -3.1224e-02, -6.1661e-03, -1.3442e-02,  4.6404e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2950e+00,  1.3200e+02, -8.7652e-01, -1.3973e-01, -5.3813e-01,
         1.5665e-01,  1.3116e-01, -1.1883e+00,  4.3964e-01,  3.1587e-01,
        -2.5860e-01, -3.5438e-02,  4.9386e-02,  1.8264e-01,  3.6953e-01,
        -2.1492e-01, -4.0640e-01, -9.7939e-02,  2.0799e-01,  1.1609e-01,
         2.8969e-01,  5.1797e-02,  3.0875e-01, -1.7439e-01,  2.0127e-01,
        -2.6994e-02, -2.7876e-01, -9.1520e-02, -3.0458e-01,  5.6818e-02,
        -5.6849e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-5.2862e+00,  1.7695e+02, -1.4843e+00, -9.1083e-01, -4.9209e-01,
         1.0615e-01,  2.3712e-01,  1.2142e-01, -4.7545e-01, -5.0222e-02,
        -5.5138e-02, -1.4615e-01,  8.4266e-02,  2.2721e+00,  1.2898e-01,
         3.5913e-01,  1.8427e+00,  4.5510e-01,  1.0059e+00,  1.3137e-01,
         1.5380e-01, -2.1465e-01,  6.7405e-01,  1.0815e-01,  3.2719e-01,
         2.4767e-02, -1.2052e-01, -9.7062e-01, -1.5581e-01,  1.5903e-01,
        -2.3156e+00, -4.0024e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5869e-01, -7.8807e+01, -2.2446e-02,  2.3041e-01, -2.9369e-01,
        -1.6472e-01,  7.2095e-02, -3.3552e-01, -3.0362e-01,  2.0884e-02,
         7.0383e-02, -2.6360e-01, -1.0392e-01, -3.9289e-01, -4.3251e-02,
        -1.0900e-01, -3.1100e-01, -7.5594e-02,  2.4627e-01,  1.8876e-02,
         3.9506e-02, -6.4623e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6205e-01,  5.2598e+01, -1.6781e-01, -2.9370e-01, -5.2561e-02,
         2.1629e-01, -4.3828e-02, -1.2914e-01, -3.4810e-02,  5.8266e-02,
         7.7334e-02, -4.6124e-02, -1.3137e-02, -7.2325e-02, -4.8682e-02,
         9.4768e-02, -6.9665e-02,  2.6080e-01, -1.3710e-01, -1.5135e-01,
        -1.6818e-01, -2.3789e-01,  2.8564e-02, -1.2439e-01, -1.4374e-02,
        -1.4129e-02, -1.8141e-02, -2.3395e-01, -2.4302e-01,  1.7880e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5554e-02,  1.3101e+00, -4.0185e-03,  1.7091e-04, -6.9607e-04,
         3.6778e-03, -8.5654e-03,  3.2426e-04, -1.5163e-03, -2.1836e-03,
         3.2331e-03,  5.3416e-03,  2.4732e-03,  3.6578e-03, -4.0297e-03,
        -2.5804e-03, -1.1443e-03,  6.2711e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.7803e-02,  6.3701e+00,  2.1188e-01, -2.8294e-03, -2.3404e-02,
        -1.3547e-02,  7.9431e-03,  1.2848e-02,  3.5335e-03,  1.3338e-03,
         1.2337e-02,  5.0822e-03,  1.4074e-02,  6.8998e-03,  4.0428e-03,
        -1.0926e-02, -4.0737e-03, -1.4370e-02, -6.4483e-03, -8.4224e-03,
        -2.9381e-03, -2.9888e-02,  1.2345e-02,  1.2664e-02,  6.2209e-03,
         1.0978e-02, -4.6477e-02,  1.8016e-03,  1.4038e-02, -9.2544e-04,
         2.2334e-03,  4.0684e-03,  4.4017e-02,  2.1624e-02,  4.5391e-03,
         2.3955e-02,  2.5967e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8101e-01,  6.8126e+01,  5.2307e-01, -1.7470e-01, -1.6567e-01,
        -1.0746e-01,  4.0261e-03,  1.2844e-02,  1.2256e-02, -7.9297e-02,
         1.1111e-01, -2.7714e-01,  7.0681e-02,  1.8554e-02,  2.5063e-01,
        -5.1345e-02, -1.1401e-01,  3.0054e-02,  7.5019e-02,  7.9134e-02,
        -1.0558e-01, -1.1784e-01,  1.3164e-01, -1.7797e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5744e-02,  2.6896e+01, -2.0564e-01, -7.2349e-02, -2.5645e-02,
         6.1024e-02, -3.0381e-02, -3.0654e-02,  3.9169e-02, -2.4192e-02,
        -1.0373e-02, -3.1855e-02,  6.2475e-02, -4.0979e-02, -1.7924e-03,
        -2.6992e-02, -7.6753e-03, -7.8770e-03,  2.1254e-02,  1.4486e-03,
         2.2227e-02, -4.1069e-02,  2.3473e-03,  2.3187e-02, -4.4461e-02,
        -1.8399e-02, -6.3056e-03, -4.1831e-03, -5.5745e-03, -2.0464e-02,
        -1.2685e-01,  1.9803e-02,  1.8700e-02, -2.8253e-03,  3.3148e-02,
        -1.5102e-02,  9.8644e-03, -3.8593e-03,  2.9815e-02, -9.8461e-03,
         6.9922e-03,  1.4842e-02,  2.3282e-02,  1.0728e-01, -6.0554e-03,
         2.7112e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6471e-02,  7.1841e+00, -1.5858e-01, -3.1372e-02, -1.2250e-01,
        -2.1368e-02, -6.6267e-03, -1.6071e-02,  7.3285e-03,  1.7498e-02,
        -1.0148e-02, -2.0609e-02, -3.0817e-02, -1.1073e-02,  3.2757e-02,
         1.0185e-02, -1.9200e-02,  3.1486e-02,  2.5417e-02,  9.9366e-03,
         1.7176e-02,  2.6734e-02,  2.1087e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0431e+00,  9.7053e+01, -8.4696e-01, -4.7561e-02,  1.7741e+00,
         8.6398e-01,  6.8625e-03,  8.2022e-01,  8.9225e-01, -3.2827e-01,
         8.7389e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1267e+00,  9.0821e+01, -2.6136e-01, -4.1028e-01,  3.9231e-01,
        -2.8488e-01,  1.3343e-01,  3.9678e-01, -1.2326e-01, -1.8632e-01,
         1.9620e-01, -5.4108e-02,  1.6736e-01, -1.0790e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1744e-01,  9.4454e+00,  6.6749e-03,  1.1572e-02, -2.3268e-02,
        -4.7578e-02, -2.1392e-02, -1.3452e-02, -2.5072e-02, -1.1736e-01,
         1.6131e-01,  1.7619e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5618e-02,  1.5206e+01,  1.1164e-03,  1.7887e-01,  1.1550e-02,
         3.7875e-02, -2.2635e-02, -2.4391e-02, -2.0706e-02,  2.0497e-02,
         1.4925e-03, -1.8280e-02,  5.8704e-03,  4.3632e-03, -1.0778e-03,
         1.2380e-02, -1.4330e-04,  2.2573e-03,  9.4659e-03, -3.6586e-02,
        -6.1327e-03,  4.9414e-03,  2.5216e-02, -1.9040e-02,  3.5695e-02,
        -1.8152e-02,  1.3112e-01,  5.2139e-03, -2.7119e-03,  1.1566e-02,
        -6.5720e-04,  2.6260e-02,  1.6212e-02, -1.2541e-02,  1.2964e-02,
        -1.1701e-02, -3.9976e-03,  1.4699e-02,  2.2581e-02,  1.1974e-03,
        -3.8676e-03, -1.3695e-02, -3.2406e-03,  7.2803e-03,  3.4884e-03,
        -4.4575e-03,  1.6002e-03, -1.7633e-02, -9.5716e-03, -2.3156e-02,
         9.5088e-03, -4.0298e-03, -1.0246e-02, -4.3540e-02,  4.2421e-02],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 1.4200e+00, -2.6227e+02,  3.9571e-01,  3.7572e-01, -6.9484e-01,
        -1.2476e+00,  6.0319e-02, -8.4129e-01, -1.6311e+00,  7.5037e-01,
         4.0719e-01, -7.8814e-01,  6.3024e-01,  1.2406e-01, -5.2152e-01,
        -2.8825e-01,  4.6026e-01, -1.2876e+00, -5.2688e-01,  1.4343e+00,
         6.1270e-03, -3.0551e-01, -6.2923e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0583e-01,  3.2804e+01,  5.4136e-01, -1.3378e-01,  1.1606e-01,
         7.4598e-02,  1.3017e-01, -7.2426e-02, -4.2906e-02,  3.6294e-02,
        -4.9351e-02, -5.0899e-02, -9.3737e-03,  2.1559e-01,  2.0275e-02,
        -2.8089e-02, -6.7643e-02,  9.5458e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7339e-05,  4.0752e-01, -3.0640e-03, -2.0299e-03, -1.0722e-03,
        -4.1541e-04,  1.0596e-03, -2.1793e-03, -1.5562e-04, -2.9996e-04,
         2.0072e-03, -3.8447e-04,  1.2785e-03, -1.1090e-03,  7.7445e-05,
         2.4567e-03, -5.8023e-04, -4.7618e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7872e-02,  2.0226e+01, -3.2212e-01,  5.5744e-02, -1.2761e-02,
         8.1105e-02, -7.4464e-02, -1.1212e-03,  7.7177e-02,  5.4205e-02,
        -2.3595e-02,  1.2623e-01,  1.1030e-01,  9.7367e-02, -9.6903e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.1256e-03,  5.4847e-01, -4.7475e-03,  5.6585e-03,  3.4701e-04,
        -2.9480e-04, -3.1462e-03, -1.6786e-03, -1.7537e-03, -1.5064e-03,
         5.1684e-04, -7.5972e-04, -2.5609e-04,  4.7570e-04, -1.4119e-03,
         1.1711e-03, -1.7831e-03, -1.3560e-03, -3.4405e-04,  3.7764e-03,
         1.7727e-03,  1.1156e-03,  2.9216e-05, -2.0518e-04,  4.2787e-03,
         1.0436e-03,  6.4696e-04, -3.3941e-06, -6.7923e-03,  1.0570e-03,
        -1.2865e-03, -5.2710e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8628e-01,  1.1586e+02,  1.7274e-01,  1.5538e-01,  3.6274e-01,
        -4.3722e-03,  3.6579e-01,  8.8779e-02, -4.4684e-01,  2.7812e-01,
         4.0581e-02, -3.4277e-01, -1.2385e-01, -5.6060e-02, -1.3773e-01,
        -1.4271e-01, -3.5341e-01,  2.6128e-01,  1.7903e-01,  1.2600e-01,
         3.6294e-01,  8.6694e-02, -2.3287e-01, -8.1774e-02, -9.5312e-02,
        -1.6278e-01,  4.8783e-02,  1.0209e+00,  1.4137e-01, -1.8729e-01,
        -1.0441e-01,  3.9869e-01,  1.7474e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0199e+00,  1.4493e+02,  5.5293e-01,  1.0028e+00,  8.1842e-01,
         7.4595e-04,  5.1925e-01, -8.6231e-03,  7.3052e-01,  4.8105e-01,
         2.7632e-01,  4.3335e-01,  4.6551e-01,  4.1081e-01,  2.3690e-01,
         1.2178e-01,  4.0824e-01, -2.5567e-03, -2.5219e-01, -4.7425e-03,
         4.3997e-02, -1.5705e-01,  7.4251e-02, -1.6148e-01, -2.6923e-02,
         1.1113e-01, -3.6048e-02, -1.0748e-01, -2.3544e-02, -1.5887e-01,
         9.7565e-02,  8.1884e-02,  3.5084e-01, -1.6541e-02,  5.3123e-02,
         2.2594e-02,  3.6871e-02, -8.4507e-02,  1.4316e-01,  1.3447e-01,
         1.9495e-02, -1.1372e-01, -2.0650e-02,  6.1867e-02,  8.0936e-02,
         8.0426e-02,  2.2929e-02,  9.1331e-02, -1.0666e-02, -2.8864e-01,
        -1.8711e-01, -7.0963e-03, -1.0521e-01,  6.3394e-03,  5.9841e-02,
         9.0418e-03,  3.9122e-02, -7.4725e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6604e-02, -3.4603e+01, -6.0143e-02,  2.2657e-01,  1.2471e-01,
        -2.4729e-02,  7.3541e-03,  2.1545e-02,  4.6616e-02,  6.0023e-02,
        -4.6264e-02,  1.2074e-02,  2.0685e-02, -1.7163e-01, -2.6563e-01,
         5.9235e-02,  1.6211e-02,  2.3471e-02,  4.7922e-02,  1.2574e-02,
         7.1896e-02, -4.9489e-03,  7.1550e-02,  5.4096e-02,  2.7283e-02,
        -9.8289e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.8453e-01,  2.5458e+01, -5.1148e-01, -3.5718e-02, -1.1443e-01,
         1.7850e-01, -9.4732e-02,  9.0547e-02, -1.1280e-01, -2.3538e-03,
         4.5669e-03, -2.9691e-02,  1.3317e-01,  9.8515e-02, -9.0660e-02,
        -5.7559e-02, -3.2948e-02, -1.3136e-02, -2.8270e-02, -9.8804e-02,
         3.6710e-02, -1.7212e-01, -4.9387e-02, -2.5760e-02, -7.8458e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.6489e-01, -1.0346e+02, -2.0656e+00,  2.5355e-01, -4.3965e-03,
        -1.8563e-01,  1.5892e-01,  7.3264e-02,  6.3167e-02,  5.0100e-02,
        -8.2351e-02,  4.4313e-02,  3.2862e-01, -3.2660e-01, -1.2018e-01,
         7.7269e-02,  3.4069e-02, -1.3414e-02,  3.9731e-02,  6.0218e-02,
         4.5598e-02,  1.2240e-01, -3.2598e-02,  1.2122e-02, -1.0729e-01,
         1.4634e-01, -1.1435e-01,  2.7333e-02,  5.1879e-02, -8.4963e-02,
        -9.3339e-02,  1.2225e-01,  7.9006e-01,  4.5552e-02,  1.7773e-01,
         2.9221e-01, -3.3841e-02,  6.4067e-02,  5.1857e-02, -2.2676e-01,
        -1.3621e-02,  1.2044e-01, -4.0959e-02, -2.1828e-01, -7.2840e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3084e-01,  2.3549e+01,  2.1576e-01, -2.4631e-01, -4.2895e-02,
        -1.7703e-02, -8.8972e-02, -4.1641e-01,  9.2093e-02, -1.0328e-02,
        -1.0164e-01, -3.7877e-02,  7.9165e-03,  8.3118e-02,  6.3782e-02,
         1.2043e-01,  1.1912e-01, -9.5910e-02, -2.7280e-03,  5.5960e-02,
        -1.1456e-02, -4.4075e-02,  7.3416e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5395, 11.1024,  0.0731,  0.0188, -0.0242,  0.0208, -0.0920,  0.0470,
         0.0582,  0.0569,  0.1079, -0.0635,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-7.0696e-03,  2.6297e+00, -4.6489e-03, -6.7701e-04, -1.3431e-02,
         1.7450e-03,  3.7100e-02,  1.3085e-02,  2.9941e-03,  1.1570e-03,
         9.5254e-03, -4.5661e-04,  1.1477e-03, -5.0286e-03, -1.5660e-02,
         5.3086e-03, -6.2050e-03, -2.4291e-03,  4.9082e-03, -5.2400e-03,
        -5.8244e-04, -2.3871e-03,  1.3128e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.3792e-01, -1.6845e+02, -8.7753e-01,  3.1499e-01,  2.3651e-01,
         4.8295e-01, -7.4125e-01, -2.5572e-01, -3.6113e-01, -3.2805e-01,
        -1.7920e-02,  3.5481e-01, -2.1177e-01, -2.0864e-01, -2.2454e-01,
        -2.1680e-01, -2.0977e-01,  4.7940e-02, -1.2028e-01, -3.1152e-01,
        -1.3147e-01, -2.4489e-01, -1.1640e-01, -1.9392e-01, -2.1282e-01,
        -3.0900e-01,  1.1518e-01, -1.4674e-01, -4.4323e-02, -2.6972e-01,
        -5.8150e-01,  9.9226e-02,  1.3866e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.3017e-02,  4.9457e+00,  4.3100e-02, -1.4499e-02,  2.0747e-02,
        -9.5224e-03, -1.4089e-02,  1.0947e-02,  3.6734e-03,  3.2797e-03,
         2.4701e-02,  4.2236e-03, -1.1975e-02, -3.8053e-03,  3.4134e-03,
        -6.4239e-03,  4.9588e-03, -1.1668e-02, -2.1827e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0763e-03,  1.6125e+00, -9.7178e-04, -1.1987e-02, -7.0422e-03,
         4.7248e-03,  5.4665e-03, -3.5277e-03,  1.7606e-03,  2.2794e-03,
        -1.5704e-02, -8.0101e-03, -3.0795e-03, -9.9609e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3521e+00, -2.0053e+02, -8.5687e-01,  5.5072e-01, -6.0546e-01,
        -8.9940e-01, -2.3282e-01, -7.1077e-01, -5.8362e-02, -3.9941e-02,
        -9.2562e-02, -5.0680e-01, -9.4685e-02, -5.9041e-01, -2.2573e-01,
        -1.5091e-01, -5.1516e-01, -1.6139e-01,  4.3088e-01,  2.1905e-01,
         2.8611e-01, -3.4361e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5920e-01,  3.2072e+01, -1.5697e-01, -2.9107e-01,  1.4064e-02,
        -3.9771e-02, -8.0465e-02, -1.2135e-01, -3.3392e-02, -2.7938e-02,
        -1.7159e-02, -6.0480e-02, -4.6328e-02,  1.1353e-02, -2.9890e-02,
        -4.6991e-02, -2.3017e-02,  2.9557e-02, -2.2440e-02, -3.8701e-04,
        -4.6762e-02, -1.1030e-03, -1.9929e-02,  1.9392e-02,  1.8374e-02,
        -4.4093e-03,  1.3965e-01, -1.6233e-02, -3.0346e-02,  1.3313e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1076e-01,  2.9440e+01, -1.7335e-01,  1.1061e-02, -6.0283e-02,
         8.1691e-02, -1.9437e-01, -8.7648e-02,  4.2152e-02, -9.4871e-02,
        -2.0922e-01,  9.0363e-03,  1.6250e-02,  2.2539e-02, -7.8327e-03,
        -3.5032e-01, -1.0848e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6785e-01,  1.2896e+01,  2.3464e-02, -8.8840e-02, -2.7654e-03,
        -5.9474e-03, -2.7994e-02,  1.0847e-01, -4.1477e-02, -1.0432e-02,
         1.6608e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4735e+00, -9.6660e+01, -7.9697e-01, -2.2217e-01,  2.7744e-01,
         3.4958e-02,  8.4453e-01,  3.0730e-01,  1.7098e-01,  6.1764e-02,
         3.5077e-01, -2.0102e-01,  4.1448e-01,  2.8107e-01,  2.9974e-02,
         4.8923e-01, -7.2980e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7537e-02,  5.4535e+00, -2.4013e-02,  2.3426e-02,  1.0170e-02,
        -9.8852e-04,  3.9205e-03, -9.9028e-04, -2.6399e-04, -7.2021e-03,
         2.4535e-03,  1.3276e-03, -1.5861e-03, -2.0310e-03,  2.7874e-02,
        -4.4185e-03,  1.1898e-03, -8.8871e-03, -9.9277e-03,  4.4777e-03,
        -8.1918e-03, -5.6629e-03,  5.2218e-03,  1.1154e-03, -1.6215e-02,
        -1.7981e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8327e+00,  1.0107e+02,  1.5055e+00, -9.7962e-02, -1.0980e-01,
         7.4833e-01, -4.4352e-01,  5.6104e-01, -3.1890e-01,  1.0618e-01,
        -1.4854e-01, -9.3026e-01,  1.2664e-01, -4.6958e-01,  9.3352e-02,
         5.8008e-02, -8.4907e-02, -5.0108e-01,  1.8614e-01, -1.3197e-01,
        -8.5544e-02,  1.9625e-01, -1.2375e-01, -2.7897e-01, -2.6486e-01,
         2.3915e-02,  1.3598e-01, -2.4128e-01,  3.2215e-01, -2.9016e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9435e-01,  3.9628e+01,  9.7511e-02,  4.8340e-02,  7.8936e-02,
        -1.6853e-01,  7.5490e-02,  4.9276e-02,  1.9473e-02,  4.9183e-02,
         7.2058e-02,  6.6436e-02,  7.4470e-02,  2.2042e-01,  6.2074e-02,
         1.2757e-01, -1.2748e-01, -1.0538e-01,  6.6041e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-6.1820e-01, -1.5170e+02, -1.1505e+00, -2.2665e+00, -2.5286e-01,
         7.7866e-01, -1.0680e+00,  5.0774e-02, -7.0486e-02, -1.6221e-01,
         8.5622e-04, -1.0489e-01, -2.7928e-02, -2.4689e-01,  5.4705e-01,
         2.2848e-01, -2.6960e-01, -3.2740e-01, -2.4697e-01, -9.0601e-01,
        -1.7632e-01,  3.1480e-01,  4.3516e-02,  7.5272e-02, -1.4310e-01,
        -6.8630e-02, -6.0808e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8021e-01,  2.3084e+01, -9.2717e-02, -1.1660e-01,  4.1538e-02,
        -3.5435e-02,  1.6928e-02,  5.7625e-03,  1.0869e-01, -8.3476e-02,
         4.7425e-03,  3.4564e-02, -8.8220e-02, -1.6800e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6673e-02,  1.6390e+01, -8.9095e-02, -6.1486e-02, -3.4134e-02,
        -1.5311e-01, -3.1188e-03,  6.8372e-02, -7.4608e-02, -7.4423e-03,
         2.8470e-02, -3.3868e-02, -8.6584e-03, -5.6119e-02, -3.3910e-02,
        -1.8642e-02, -7.0688e-02, -2.8977e-02,  7.1530e-04,  4.2715e-02,
        -7.6781e-03, -6.5413e-02, -2.0632e-02, -2.0671e-02,  5.0513e-02,
        -3.1825e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5171e-02,  9.4145e+00, -1.1199e-01, -4.9607e-02, -2.7910e-02,
        -9.8347e-02,  3.3181e-02, -1.6223e-03,  2.5326e-03,  1.3606e-02,
         3.3220e-02,  2.0017e-02, -2.0230e-02,  3.0503e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0559e-02,  6.5239e+00, -7.2259e-03,  3.5996e-02, -1.4626e-02,
        -1.1271e-02,  6.1456e-03, -2.1380e-03, -7.1845e-03,  2.7447e-03,
         6.7289e-04,  9.4170e-04,  1.0951e-02, -3.1116e-04,  1.7967e-02,
         9.5027e-03, -4.0275e-03,  1.4492e-03,  1.9868e-04,  3.1312e-03,
         8.2806e-03, -1.4999e-03, -2.8936e-03, -6.8199e-03,  2.6859e-03,
        -4.2027e-03, -7.1868e-03,  5.8383e-03, -5.4534e-03,  3.4109e-03,
         3.5709e-03, -9.5787e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5018e-02,  3.9221e+00, -2.1682e-02,  2.2534e-02, -3.2446e-02,
         2.2639e-03, -2.5209e-02, -4.5914e-03, -9.5519e-03, -2.4022e-03,
        -8.2788e-03,  1.9169e-04,  8.7462e-03, -4.6004e-03,  1.0089e-02,
         3.0778e-03,  2.7477e-03,  7.3609e-03,  5.3127e-03,  8.8782e-03,
         4.5784e-03,  4.9775e-03,  3.8477e-03,  6.0158e-03, -3.8993e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6295, 85.5366,  2.5587,  0.4325,  0.5260, -0.4498,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5452e-02,  1.7261e+01,  1.1057e-01, -9.0813e-02,  4.8772e-02,
        -2.3158e-02,  1.4264e-02,  1.9733e-02, -2.1336e-03, -6.8260e-03,
         1.4214e-02,  1.7613e-02,  3.6007e-03,  1.2824e-01,  3.3622e-02,
         5.9825e-03,  3.7445e-02,  3.6761e-02,  3.2181e-02, -3.2077e-03,
        -4.6161e-02,  9.3896e-03,  3.4212e-03,  1.3053e-02,  1.4294e-02,
         7.4766e-02,  4.7271e-03, -5.2719e-04,  1.5568e-02,  1.9936e-02,
         3.5870e-02, -6.0842e-02, -2.3976e-02, -1.4029e-03,  1.4520e-02,
        -1.7017e-03, -4.5160e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3498e-01, -3.8257e+01,  1.5878e-01, -2.6163e-01,  1.0109e-01,
         1.1767e-01,  3.8180e-01, -3.4376e-02,  5.0806e-02,  2.6158e-02,
         5.3543e-02, -7.4047e-03, -1.2210e-02,  8.3193e-02,  1.9140e-02,
         7.2020e-02,  1.0061e-01, -7.6282e-03,  5.7974e-02, -6.3995e-02,
         4.0092e-01,  6.7214e-02,  2.1423e-01,  1.4898e-02,  3.8835e-02,
         1.6356e-02,  4.2311e-04,  2.1481e-02,  1.1691e-01, -5.7969e-02,
         8.4383e-02,  7.9187e-02,  2.7490e-02,  5.8467e-02,  9.2067e-02,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2483e-01, -2.2948e+01,  6.4584e-01, -4.1500e-02,  3.9817e-03,
         1.0764e-02, -2.9378e-02,  5.2884e-02,  7.6975e-02, -1.2867e-01,
         9.9459e-02,  4.0161e-01, -1.3625e-01,  3.3563e-02, -3.2513e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3183e+00, -2.6569e+02,  1.0055e+00,  5.5419e-01, -1.0225e+00,
         3.4756e-01, -8.8015e-01, -1.0366e+00, -1.5806e-01, -9.0951e-01,
         4.1613e-01, -1.2062e-01,  2.1304e-02,  2.9475e-01, -3.3009e-01,
        -2.8363e-01, -5.7314e-01,  4.7906e-01, -6.5191e-01,  8.4605e-02,
         2.3822e-01, -1.8852e-01, -8.2996e-02, -3.0945e-01, -1.2809e-01,
        -4.0711e-02, -1.6553e-01, -1.8923e-01, -1.7369e-01, -2.0654e-01,
        -2.5813e-01,  1.6685e-01,  1.7948e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1584e-02, -5.6389e+00, -2.9519e-03,  2.8521e-02,  2.3070e-02,
         8.8756e-03,  5.3364e-03, -4.2367e-05,  1.3785e-02,  9.9796e-03,
         6.0966e-03,  1.1578e-02, -3.3562e-03,  2.2997e-02, -3.8517e-03,
         3.1330e-03,  7.9948e-03, -2.4127e-03,  4.2421e-02,  1.0628e-02,
         4.5340e-03,  1.0670e-02, -6.5877e-03, -7.9593e-05, -4.7041e-03,
        -1.1957e-03,  9.8674e-04,  7.2387e-02,  4.3147e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.0870e-02,  2.1240e+00, -3.9413e-03, -2.0433e-03,  3.0065e-03,
         1.4499e-03,  1.6182e-03, -5.0261e-03, -3.8796e-03,  3.4374e-03,
         4.2407e-03,  4.7930e-03,  3.6613e-03,  5.3741e-04, -1.4119e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1100e-01,  3.1958e+01, -1.3864e-01, -8.4734e-02, -2.1274e-02,
        -6.3616e-02,  2.6313e-03, -2.9059e-02,  4.3933e-02, -5.1974e-02,
         4.8568e-04, -3.2007e-02, -1.0393e-02,  1.0115e-02,  2.1731e-03,
        -5.0988e-02, -2.0628e-03, -2.5726e-03, -2.2044e-02, -3.3004e-02,
        -3.0385e-02, -3.2226e-02, -2.0844e-02,  5.4658e-02,  2.6342e-02,
        -3.2406e-02,  2.0921e-02,  2.1110e-02, -7.8259e-03, -5.3199e-03,
        -8.3266e-04, -2.0103e-02, -3.6921e-02,  1.2686e-02,  1.3159e-02,
         8.7945e-02,  4.7337e-02, -1.7772e-02,  1.5433e-02, -1.7566e-02,
        -1.2178e-04,  5.4590e-02, -6.1126e-02, -4.2145e-02,  1.2194e-02,
        -5.6228e-02,  3.3542e-02, -1.4617e-02, -1.3256e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.2905e-01, -1.9327e+01, -2.2433e-01, -4.4006e-02, -1.2705e-03,
        -3.6586e-02,  1.3145e-02, -2.3857e-02, -8.8738e-03, -1.3753e-01,
        -8.5981e-03, -3.8430e-02,  1.0043e-02,  2.5136e-03,  1.7425e-02,
        -1.1405e-02, -6.4151e-02, -1.9035e-02, -5.3726e-03, -2.7290e-02,
        -3.2513e-02, -3.3427e-02,  1.5066e-02, -1.9058e-02, -1.5448e-02,
        -4.7076e-02,  2.5681e-02,  6.5872e-03, -4.3711e-02,  4.4114e-02,
        -5.7121e-03,  3.9746e-03,  5.2847e-02, -1.1739e-01,  1.0903e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.3886e-02,  1.0004e+01,  1.0991e-01,  2.8942e-02, -5.7894e-03,
         2.7308e-02, -6.0493e-02,  6.1159e-02,  8.4162e-03,  1.0966e-02,
        -3.3386e-02, -1.2358e-02, -1.1196e-02,  3.4824e-03,  3.0924e-02,
        -4.5841e-02, -1.0760e-02,  2.5740e-03, -5.4239e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1562e+00,  1.2479e+02, -1.0142e+00,  5.3032e-01, -5.4262e-01,
        -1.0656e-02, -8.5811e-01, -1.1327e-01,  1.2317e-02, -5.6243e-01,
        -5.4052e-01, -2.9943e-01, -4.3201e-01, -9.8830e-01,  2.3923e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1009e-01,  1.0840e+01, -5.4732e-05, -3.7857e-03,  6.6632e-03,
        -6.2301e-03,  1.9902e-02, -1.9630e-04,  3.1808e-02,  2.7234e-02,
        -1.7193e-02,  2.0147e-03,  7.5646e-03, -8.0350e-03, -3.0080e-02,
         4.8301e-03, -5.4599e-03, -1.2506e-02, -5.1064e-02,  2.5951e-02,
         7.7337e-03, -1.1078e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7313e-02,  2.8805e+00,  9.1657e-03, -1.4352e-02, -3.3307e-03,
        -2.1965e-02, -1.3723e-02, -1.2803e-03, -6.7827e-03, -4.0937e-03,
        -3.3962e-03,  6.1037e-03, -1.6020e-02, -3.0701e-03,  3.5092e-04,
         2.4075e-03, -2.2540e-03,  1.9844e-03, -6.6237e-03, -4.0433e-03,
        -3.4985e-04, -2.4713e-03, -3.6390e-03,  4.1402e-03,  6.7594e-03,
         4.6739e-03, -3.9004e-03, -3.1060e-03,  4.3088e-04,  1.6757e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7531e-01,  1.3542e+01, -6.9139e-02, -1.6669e-01, -1.1265e-01,
         8.4747e-02,  2.8563e-03, -4.8965e-02, -7.4348e-02, -5.7426e-02,
        -1.5287e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8571e-03,  9.0783e-01,  1.9966e-03,  3.8534e-04, -1.0222e-03,
         7.9080e-04,  4.2939e-03,  2.1838e-04,  2.7665e-03, -1.4614e-03,
        -8.0682e-04,  1.8167e-03,  1.5676e-04,  1.2180e-03,  4.0645e-04,
         2.4419e-03, -2.0423e-03,  1.0722e-03, -6.8579e-04,  6.1378e-04,
         3.8498e-03, -1.3978e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9356e-01,  1.2273e+02,  3.4933e-01, -4.3250e-01, -6.3974e-01,
         4.8207e-01,  2.5315e-01,  5.3443e-01, -2.5513e-01,  5.2467e-02,
        -8.2194e-01,  3.5591e-01,  2.3361e-01, -4.6390e-01,  2.7339e-02,
         4.6478e-02,  1.9852e-01, -8.9475e-02, -3.0235e-01,  1.1408e-01,
         1.1705e-01, -1.3228e-01, -2.7502e-01, -4.5369e-02,  1.5106e-01,
         5.2430e-02, -1.5739e-01, -9.5130e-02, -9.8912e-01, -5.0481e-02,
         7.4534e-01, -9.2472e-02,  8.9194e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7097e-02, -1.9796e+01, -4.9310e-01, -6.3761e-02, -1.5126e-01,
        -9.4773e-02, -5.4079e-02,  2.2996e-01,  1.7993e-02,  1.8360e-02,
        -3.1282e-03, -2.8952e-02,  8.8257e-03,  1.7594e-02, -3.4800e-02,
         1.9675e-02, -5.8577e-02,  5.5692e-02, -3.9194e-02,  3.0402e-02,
        -5.8810e-02,  1.0095e-01,  6.5462e-03, -4.9926e-03, -1.1491e-01,
        -2.8550e-02,  8.4872e-02,  6.1548e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0173e-01,  2.1740e+01,  1.4233e-01, -2.4849e-02, -1.5510e-02,
        -1.4474e-01,  7.1657e-02,  7.3473e-02, -1.2168e-01,  1.0990e-01,
         1.8369e-03, -1.4811e-03,  1.0571e-02,  4.1657e-02, -6.9960e-03,
         4.5922e-03, -2.5050e-02, -5.1764e-02, -2.4435e-02, -6.4938e-02,
        -2.2473e-04,  4.0789e-03,  3.5298e-02, -4.0189e-02,  3.2007e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 1.8908e-01,  7.9287e+00, -4.6475e-03, -4.1386e-03, -3.2175e-02,
         1.9180e-02, -1.3291e-02,  4.8452e-03, -2.7856e-03, -9.7096e-03,
         1.3054e-02, -1.3806e-02, -6.4578e-03,  1.4315e-02, -5.0748e-03,
        -4.4574e-02,  1.6894e-03, -1.8953e-02, -1.2058e-02,  1.5152e-02,
         1.5175e-02,  3.6984e-03,  3.4006e-03, -7.4422e-03,  1.7444e-02,
        -2.8566e-04, -3.6384e-03, -1.1547e-02, -9.6450e-03, -1.4997e-02,
         7.9281e-04,  1.3154e-04, -2.5181e-02,  7.6821e-03, -8.0180e-03,
         9.4275e-04, -8.8029e-03, -3.8435e-03,  1.6830e-02, -7.0082e-03,
         8.3462e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.2570e-02,  3.2702e+01,  6.3061e-02,  5.2488e-03,  1.1308e-01,
        -2.2219e-02, -7.1650e-02,  5.7818e-02,  7.5945e-02, -7.9953e-03,
         7.5347e-02,  4.1231e-02, -2.4693e-02, -3.2352e-02,  2.6968e-01,
        -1.1123e-01,  6.1623e-02,  2.6764e-02, -5.2162e-02,  1.2206e-03,
         3.5895e-02,  8.0854e-02,  1.3003e-02, -2.0776e-02, -1.1512e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1565e+00,  1.2257e+02,  8.2437e-01,  3.3210e-02,  5.7544e-01,
         3.1509e-02,  7.6415e-02,  5.0134e-01,  1.6981e-01,  3.3616e-01,
        -1.5484e-01,  4.3497e-01,  2.2988e-01,  9.0142e-03,  3.3246e-01,
        -1.9330e-01,  2.9807e-01,  6.2290e-02, -2.0894e-01,  5.0158e-01,
        -6.1170e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.1916e-03,  4.1042e+00,  2.7757e-02, -3.0338e-02,  1.9312e-03,
         8.5061e-03, -7.2603e-04, -9.1752e-03, -8.0087e-03, -1.8173e-02,
         1.5485e-02, -2.5099e-02,  9.8617e-03, -1.9887e-02, -9.2069e-03,
         6.9020e-03,  2.4433e-03, -4.0175e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3776e-02,  4.5048e+01,  3.0949e-01,  1.1667e-01,  6.5091e-02,
         1.3112e-01,  7.0754e-02, -7.9062e-02,  1.4236e-01,  1.1364e-01,
         3.7776e-02,  3.7578e-02, -1.7001e-01,  1.1480e-01,  2.6126e-01,
         1.5082e-01,  1.2005e-01,  5.2133e-02, -6.1880e-02,  2.6905e-02,
        -8.9854e-02,  4.5690e-02,  1.0356e-03, -1.1454e-02,  6.0579e-03,
         1.4385e-02, -3.7704e-02,  8.8858e-02, -2.3921e-02,  3.4091e-03,
        -3.0452e-02,  6.1494e-02,  1.6976e-02, -2.7813e-01,  6.9328e-02,
         1.2625e-01,  3.7390e-01, -2.1855e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9548e-02,  1.1190e+01, -1.6542e-01, -3.6240e-02,  1.1595e-03,
         4.7007e-02, -3.9418e-02,  2.1805e-02,  1.8745e-02,  1.3868e-02,
         2.8399e-02, -3.2349e-02,  5.8296e-03, -4.9801e-03,  6.2227e-04,
        -5.6485e-03,  1.2309e-03, -2.7299e-04, -1.7992e-02,  4.7047e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4443e+00,  1.8608e+02, -2.2450e+00, -4.3858e-01, -1.2111e+00,
        -2.7959e-01, -9.0110e-01,  4.3360e-01, -1.1642e-01,  4.4256e-03,
         1.0600e+00, -5.3598e-01, -4.7783e-02,  1.3407e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3400e-01,  2.8942e+01,  1.6683e-01, -2.0557e-02, -3.2508e-02,
        -3.3800e-02,  2.5459e-02,  1.7185e-02, -2.7269e-02,  7.9256e-03,
        -4.3405e-02,  9.9348e-03, -1.8950e-02,  4.3130e-02, -9.6262e-03,
        -1.0427e-01, -5.2380e-03, -1.5815e-02, -3.9352e-02,  1.0872e-02,
        -2.2272e-02,  1.4560e-02,  1.9512e-03,  1.8977e-02, -2.3631e-02,
        -4.1877e-02, -3.7685e-03, -1.1008e-01, -8.7834e-03, -1.5498e-01,
        -4.7512e-02,  1.0428e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5178e+00, -1.8661e+02,  2.9409e-01, -1.3463e-02,  1.6409e+00,
        -4.7426e-03,  4.1102e-01,  8.3885e-01,  3.1213e-01, -8.8389e-01,
         2.7893e-01,  3.9321e-01,  3.4483e-01,  4.3898e-01, -1.0490e-01,
        -3.7602e-02,  3.4746e-01,  4.3149e-01,  5.6489e-01,  2.9436e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9422e-02,  1.4630e+00,  3.2699e-02, -2.6553e-04,  5.5449e-04,
        -6.4342e-03,  4.3051e-03, -3.3525e-03,  2.3481e-03,  3.2371e-03,
         9.4541e-05,  1.5472e-03, -1.4266e-03, -1.5005e-03, -6.4120e-04,
        -6.7473e-03, -1.3175e-03, -2.2313e-03, -3.1652e-04, -4.8949e-04,
         1.1998e-03, -2.9674e-03, -1.3730e-04,  5.1090e-03, -2.8333e-05,
        -5.6089e-03, -7.5659e-03,  2.6660e-03, -4.9048e-03, -2.2374e-03,
        -6.5102e-04,  7.2538e-04,  9.8556e-04,  4.9220e-03,  1.8792e-03,
         2.9971e-03,  6.3405e-04, -2.9195e-03,  2.3812e-03, -9.9912e-04,
        -7.5204e-04, -9.9077e-04,  1.3842e-03, -4.5161e-03, -1.2287e-03,
         3.2669e-04,  1.8407e-03,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2087e-02,  5.0739e+01, -4.3121e-01,  3.0246e-01,  4.7875e-01,
         1.3443e-01, -1.4848e-01, -2.3963e-01, -1.3095e-01,  8.5443e-02,
        -2.3827e-01, -1.0159e-01, -6.5676e-02,  2.0741e-02,  3.6370e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9589e-01,  4.6607e+01, -3.0742e-01,  2.8025e-02, -5.4160e-02,
         9.9831e-02,  5.4192e-02, -1.5898e-02,  7.7930e-02,  6.4421e-02,
         8.3538e-02,  1.4333e-01,  2.0778e-02,  1.0790e-01,  3.1345e-02,
        -4.0750e-02, -5.9569e-02,  4.9186e-02,  1.5363e-01,  2.8906e-03,
        -1.4185e-02, -1.0104e-01,  9.0213e-03,  4.2742e-02,  4.6641e-02,
         1.1962e-01,  1.1259e-02,  6.3226e-01, -4.0433e-03,  2.1703e-02,
         1.2420e-01, -3.3309e-02,  8.5682e-02,  1.3178e-02,  3.9574e-02,
         3.2349e-04, -2.7403e-02,  1.3991e-02, -1.1467e-01,  2.4934e-02,
        -1.3792e-03,  1.1628e-02, -7.3983e-02,  3.2170e-02, -8.8974e-04,
         9.6128e-02,  5.4379e-02,  6.8802e-02], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-6.3683e-03,  7.6597e+00, -7.5044e-03, -2.9850e-02,  6.3503e-02,
         3.7669e-02,  1.2849e-02, -1.5505e-02, -1.6140e-02, -6.4818e-02,
        -3.1126e-03, -3.6117e-02, -2.3483e-02,  3.5696e-03, -1.2914e-02,
        -3.2796e-03, -3.8914e-02,  9.0431e-03, -3.5154e-03,  1.3017e-02,
         5.1509e-03,  2.2367e-02,  3.2749e-03, -6.0399e-03, -3.3440e-03,
        -1.3769e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8793e-03,  6.4239e+00, -2.7904e-02,  1.7042e-02,  2.4947e-02,
        -7.1451e-03,  1.5037e-02,  2.5654e-02,  1.1229e-02, -3.5075e-03,
         4.9464e-03, -4.6605e-03,  7.4032e-03,  1.1949e-02,  7.4498e-03,
        -1.9825e-02,  5.6314e-02,  5.7357e-03,  8.4685e-03, -3.8370e-05,
         2.3226e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5348e-04,  1.7447e-01, -5.0378e-04, -8.0782e-04, -3.9819e-04,
        -1.4007e-03, -1.0995e-04,  3.1680e-05,  3.1723e-04,  1.2769e-04,
        -4.7189e-05,  1.3672e-04,  6.2264e-06, -5.7648e-04,  4.2842e-04,
        -7.1581e-04, -1.6694e-04, -6.7933e-04, -2.1595e-05,  5.8840e-04,
         3.0856e-04,  1.7189e-05, -5.4678e-04,  6.8308e-05, -9.4980e-05,
        -3.6290e-04, -1.1267e-04,  1.6315e-04,  2.2292e-04, -1.1036e-04,
        -8.5753e-04, -5.0268e-05,  1.4989e-04,  4.6680e-05, -1.3092e-03,
        -4.4459e-04,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6068e-01,  1.7709e+01,  3.1025e-02,  1.2128e-01, -5.0348e-02,
         1.7836e-01, -6.1293e-02,  1.0919e-01, -3.4833e-02, -2.0800e-02,
         1.4437e-03,  2.3745e-02,  2.3092e-02, -7.2393e-02, -1.0105e-01,
        -3.9597e-02, -1.2452e-02,  7.0184e-03,  3.2145e-02,  4.4932e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9970e-03,  4.2363e+00,  9.5443e-03, -4.3982e-03, -6.1491e-03,
        -5.1728e-03,  1.0132e-02,  7.4158e-03,  8.7836e-03, -5.6335e-03,
        -2.2900e-02, -1.8219e-02, -8.2167e-03, -8.8770e-03,  1.0649e-03,
        -4.8912e-03,  6.7467e-03, -9.3008e-03,  1.0263e-02,  1.2920e-02,
         2.0627e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3916e-01,  8.8044e+01, -8.0721e-01,  3.8262e-01, -1.4258e-01,
        -5.1279e-02,  8.9326e-02, -9.8051e-02,  1.1013e-01,  3.5431e-01,
        -1.1290e-01,  3.6741e-01, -1.9388e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6165e-01,  3.8783e+01,  7.6367e-01, -3.6763e-02,  1.4104e-01,
         4.7679e-02,  3.1499e-02, -1.3538e-02, -6.7906e-02, -4.3154e-03,
         3.2195e-02, -2.1298e-01, -1.9288e-02,  4.5271e-02,  1.1827e-01,
         7.7044e-02,  3.4374e-03,  6.1176e-03, -6.8753e-02, -8.0301e-02,
         3.9568e-03,  3.4003e-02, -3.6839e-02, -4.8665e-02,  9.0197e-03,
         2.2838e-02, -1.1642e-01,  1.4407e-02, -1.0095e-02, -3.2917e-02,
         2.8604e-03, -3.8070e-02,  1.5220e-01, -3.8670e-02,  7.8648e-04,
         1.2792e-01,  2.6200e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.1238e-02,  4.1094e+01,  2.7782e-01, -5.6310e-02,  4.6492e-02,
         9.3406e-02,  1.2400e-01,  1.0700e-02,  2.5671e-02,  9.4150e-02,
        -8.5942e-03,  4.7839e-02,  4.1450e-02,  1.5691e-01,  4.4060e-02,
         3.6575e-02,  4.9294e-02, -1.4356e-02,  2.0955e-02, -1.5267e-01,
         3.2882e-02,  5.1744e-02, -6.7577e-02, -2.8156e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7030e-01, -1.2410e+02,  4.4716e-01, -9.0690e-01, -1.6552e-01,
        -1.1758e-01,  1.9282e-01, -7.3545e-01,  1.0414e-01,  2.8985e-02,
        -1.3394e-01,  5.4776e-01, -6.7317e-01, -1.6569e-01, -2.5362e-01,
        -4.9640e-02, -2.6429e-02,  2.4499e-01,  3.8566e-02, -5.8744e-01,
        -5.4976e-03,  1.6152e-01,  1.5019e-01,  2.4081e-02, -7.5037e-02,
         8.5357e-02,  6.3072e-02,  4.1872e-01, -9.3088e-02, -1.4797e-01,
         6.2330e-02,  5.0797e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3174e-02,  6.2941e+00, -2.8252e-02, -4.7974e-02, -1.0942e-02,
        -4.4722e-02,  5.3300e-03, -2.0686e-02,  2.1330e-02, -2.7028e-02,
        -1.2050e-02, -5.4505e-04, -1.9657e-03,  4.5250e-04, -4.0917e-03,
        -1.0581e-05, -9.0743e-03, -2.2712e-03,  1.4559e-02, -5.8804e-03,
        -2.4813e-02, -3.0556e-02,  3.8348e-03, -7.0898e-03, -3.5080e-03,
         9.6569e-03,  3.7014e-02, -4.0505e-03,  7.2695e-03, -1.6427e-02,
        -2.0825e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0483e+00,  1.9455e+02, -4.2032e+00, -6.5189e-02,  9.1770e-01,
         4.1969e-01,  7.2136e-01, -1.0516e+00,  1.0344e-01,  4.8297e-02,
         2.1519e-01,  2.8827e-01,  1.1236e+00,  1.0116e+00,  8.0804e-01,
         1.1780e+00,  4.8566e-01,  3.0895e-01,  3.6267e-01, -6.9623e-01,
        -4.5728e-01, -5.5686e-01, -3.2050e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6809e-01,  8.7633e+01,  3.6026e-01, -4.7099e-01, -7.6819e-02,
         1.8628e-02,  7.6165e-02, -3.3472e-01, -5.5579e-02,  5.7363e-02,
         1.4238e-01, -3.2861e-01, -5.8984e-02,  1.6038e-01, -2.6173e-02,
         7.0980e-02, -9.5917e-02, -1.9424e-02,  7.1110e-04, -1.1519e-01,
        -1.7240e-01,  5.9758e-02, -1.2928e-01, -1.1414e-02,  5.6221e-02,
         2.0997e-01,  4.3943e-02,  5.8905e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 2.3422e-02,  6.1750e+00,  5.0618e-02,  6.9969e-03,  8.1037e-03,
         8.3114e-03,  1.0816e-02,  8.9036e-03, -3.9839e-03, -1.8982e-02,
         5.4613e-03,  9.6323e-03,  2.1474e-02,  4.8392e-03, -2.7866e-03,
         4.8165e-03,  5.6465e-03,  1.1623e-02,  2.2771e-03,  1.1321e-02,
        -1.5943e-03,  4.9004e-03, -1.3502e-02, -1.1452e-03,  1.4691e-03,
        -4.8171e-03,  4.7538e-03, -6.7557e-02, -4.6736e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5521e-01,  1.3350e+02,  1.2672e-03, -1.1307e+00,  3.2519e-01,
         6.4653e-01, -4.3212e-01, -1.3240e-01, -8.2031e-02,  8.5052e-02,
        -2.7659e-01, -3.4367e-01,  5.6883e-02,  3.8406e-01, -1.0273e-02,
        -2.5013e-01,  4.7706e-01,  6.9367e-02,  4.2246e-01, -3.2595e-02,
         3.1084e-02,  3.0627e-01,  2.9758e-01, -5.3019e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8321e-02,  1.7968e+01, -1.7247e-01, -2.7480e-02,  8.5245e-02,
         2.1065e-02,  1.8394e-02,  9.4659e-02, -5.3535e-02, -1.0956e-02,
        -1.7035e-02,  9.1770e-02,  9.7353e-02,  5.7111e-02,  1.1241e-02,
         5.9106e-02,  6.5907e-03,  1.1149e-02,  2.8037e-02,  1.1712e-02,
         4.2501e-02, -1.5707e-02,  2.6775e-02,  5.2071e-02,  3.3053e-02,
         1.8390e-02, -2.0691e-02,  1.8710e-02, -8.4002e-03, -7.3355e-03,
         2.7437e-02,  5.6153e-02, -3.8573e-02,  4.9918e-03], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8322e+00, -2.8232e+02, -6.7722e-01,  2.2372e+00,  2.0523e-01,
         3.0349e-01, -6.9758e-01,  3.9260e-01,  4.1860e-01, -8.2714e-01,
        -1.9171e-01,  1.3775e-01, -4.4259e-01,  8.0844e-01, -3.0872e-01,
        -3.1183e-01,  1.1928e+00,  6.7732e-01,  3.2395e-01,  2.6434e-01,
        -7.0438e-01, -1.6124e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4306e-02,  5.9256e+00, -5.6403e-02, -4.2420e-02, -2.3214e-02,
        -7.2630e-03, -1.6344e-02,  2.4048e-02,  1.4385e-02,  1.2533e-02,
         3.7951e-02, -8.2606e-03,  7.0093e-03, -1.6334e-02,  1.2451e-03,
         1.1230e-02,  9.2612e-03, -2.7692e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1474e-02,  2.4090e+00,  5.7243e-03, -6.9855e-04,  4.2992e-03,
         8.7634e-04,  6.9225e-03, -5.0355e-05,  2.0823e-03,  4.9326e-03,
         9.9167e-04,  4.9106e-03, -6.3393e-03,  4.7727e-03, -7.5784e-03,
        -1.1711e-03,  5.9184e-03, -2.2850e-03, -1.9427e-03,  9.6895e-04,
         1.7991e-03, -8.0544e-03,  3.1101e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9062e+00, -3.0956e+02, -2.8725e-01,  2.6040e-01,  1.0006e+00,
         8.1646e-02,  9.8057e-01,  7.3304e-02,  2.1796e+00,  1.3713e+00,
         1.5533e-01,  1.6234e-01,  2.7075e-01, -1.0893e+00,  1.6211e-01,
        -6.8109e-01,  3.9361e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4756e-01,  2.1173e+02,  1.7499e-01, -2.0305e-01,  3.2284e-01,
         4.0995e-01,  3.2049e-01,  1.6983e-01, -5.8198e-01, -3.2551e-01,
         7.7239e-02, -2.5130e-02, -7.3288e-01,  5.4702e-01,  5.5905e-01,
         3.5216e-01,  1.3304e-01, -9.7956e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.6845e-02,  1.4899e+01, -2.3761e-01, -3.6810e-02,  1.6033e-03,
         1.1207e-02,  1.2873e-02,  2.9173e-02,  9.7245e-03, -5.4146e-02,
         6.3985e-02, -8.4735e-02,  2.2604e-02, -6.5961e-03,  6.0399e-03,
         1.0360e-01,  1.7069e-02,  5.6238e-02,  4.4157e-03, -1.4230e-02,
         2.0082e-02, -4.5144e-02,  1.2083e-02,  3.7637e-02,  7.3372e-02,
        -1.1808e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4371e-02,  2.4100e+00, -1.9728e-02, -9.5887e-03, -1.3747e-03,
         7.6332e-03, -5.8613e-03,  1.6494e-03,  9.1267e-03, -7.4216e-03,
        -5.7091e-03,  2.5477e-03, -3.7629e-03, -3.9691e-03, -6.8377e-03,
        -5.2111e-03, -9.5484e-04,  8.6909e-04,  1.4255e-03, -4.0633e-03,
         3.6476e-03,  4.8179e-03,  2.9014e-03, -3.0788e-03, -7.3694e-04,
        -2.7736e-03, -4.5233e-03, -7.9268e-04, -2.3315e-03,  1.1077e-02,
        -2.2468e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2565e-03,  9.6024e-01, -5.3384e-04, -4.6699e-04, -8.8015e-03,
        -6.0200e-03, -1.3542e-03,  1.8451e-03, -2.6505e-03, -3.8909e-03,
        -2.3500e-03,  1.6511e-04, -3.9822e-03, -1.3480e-03, -6.9509e-03,
        -1.3584e-03,  3.4354e-03, -4.6478e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1366, 12.5573,  0.1964,  0.0406,  0.0650, -0.0448, -0.0205, -0.0632,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-7.0597e+00, -2.2928e+02, -2.0638e+00,  6.3705e-01, -5.6816e-01,
         3.1557e-01,  3.1068e-02,  1.3462e-01,  2.5935e-01, -5.9055e-01,
         4.1363e-01,  4.5461e-01,  2.0967e-01,  4.6855e-01, -4.0565e-01,
        -1.6508e-01, -8.6602e-02,  5.8723e-01, -3.3355e-01,  1.3827e-01,
        -9.3938e-02,  1.3197e-01,  5.7296e-01,  3.9580e-02,  2.9524e-02,
        -1.4236e-02,  2.6246e-01, -1.2485e-01, -2.0724e-01,  6.1362e-02,
        -2.4359e-01, -9.3720e-03, -2.6946e-01, -2.3528e-01, -5.8760e-01,
         1.3584e-01, -2.2544e-01,  4.6517e-02, -1.6069e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2270e-01,  1.9958e+01, -1.5158e-01, -1.5428e-01,  1.6520e-02,
        -2.7514e-02, -1.8318e-02, -7.1505e-03, -5.1566e-02,  3.0520e-02,
         4.0105e-02,  4.9122e-03,  3.9215e-02, -4.7857e-02, -2.1906e-02,
         5.9530e-02, -2.9719e-02, -1.5335e-02, -3.2005e-02, -6.1883e-04,
         5.0887e-03, -3.5788e-02, -2.5465e-02, -7.4563e-04,  2.0362e-04,
        -1.0189e-02,  5.8274e-03,  8.3255e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2988e+00,  1.4553e+02, -4.0453e-01,  4.1682e-01,  5.1690e-01,
        -2.5072e-01,  5.7681e-01,  2.6393e-01,  4.3428e-01, -5.7317e-02,
         7.8713e-01, -4.5413e-01,  3.6225e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2273e+00,  8.4037e+01, -4.9556e-02,  1.3884e-01, -2.0380e-01,
         1.0362e-02, -1.8995e-01, -8.8606e-03, -2.8486e-01, -1.1460e-02,
        -3.9654e-02,  2.6448e-04, -2.3544e-01,  6.3503e-02,  3.0847e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.6637e-02, -9.4620e+01, -5.5608e-01,  2.3355e-01,  1.1932e-02,
        -3.3918e-02, -1.0840e-01,  3.1910e-02, -3.4611e-01, -2.5415e-01,
        -1.2007e-01, -1.3666e-01, -1.8647e-01, -2.4385e-01, -1.4159e-01,
        -3.1191e-02, -7.0183e-02, -1.6258e-01, -9.8641e-02, -6.1890e-02,
        -4.1015e-02,  8.1260e-02,  2.0583e-02, -3.6419e-02,  3.0821e-02,
        -1.8016e-01, -3.6974e-02, -4.4444e-01, -4.6856e-02, -1.6261e-01,
         4.0754e-02, -8.4625e-02,  3.5012e-03, -1.4584e-01, -3.5465e-02,
        -1.6149e-01, -4.7304e-02, -7.2821e-02, -6.6186e-02,  4.7762e-02,
        -1.2991e-02, -2.5028e-02, -6.0108e-02,  6.1153e-02,  3.2558e-02,
        -1.3460e-01,  6.4841e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7910e-01, -1.8054e+02, -2.1063e+00, -3.7183e-01, -3.9979e-01,
        -2.8839e-01, -5.8084e-01, -3.4108e-01,  5.6698e-01, -4.0089e-01,
        -1.3520e-01,  2.8733e-01, -1.8814e-01, -2.9252e-02, -1.2006e-01,
        -7.9264e-02,  9.6269e-02, -5.5016e-02, -2.3293e-01, -4.1024e-01,
        -2.6793e-02, -1.0067e-02,  5.8294e-02, -3.2994e-01, -1.9434e-01,
        -1.2503e-01,  3.9984e-01, -1.0577e-01, -1.2952e-01,  2.1066e-01,
        -2.7861e-01, -1.9129e-01, -3.9705e-01, -2.2795e-01, -1.3305e-01,
        -1.1011e-01, -7.4402e-02, -9.7510e-02, -3.2610e-01,  6.4024e-02,
        -6.8717e-03,  8.9067e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4722e+00, -8.4922e+01,  8.9851e-04, -5.8773e-02, -2.1573e-01,
        -2.1277e-01, -2.9246e-01, -1.4799e-01, -1.1632e-02, -2.3423e-01,
         1.0703e-01,  1.1258e-01,  2.0398e-01,  7.1583e-02, -8.9147e-02,
         1.9710e-01, -3.4077e-01,  8.9364e-02,  3.2799e-02,  1.0482e-01,
        -2.9759e-02,  2.5115e-02,  3.3023e-01,  5.7410e-02, -2.5517e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0817e+00,  1.2357e+02,  7.9177e-01, -3.5769e-02, -1.6699e-01,
        -4.1363e-01,  8.7141e-01, -8.4204e-02, -1.6273e-01,  7.4903e-02,
         1.4640e-01, -1.6605e-01, -3.4125e-02, -2.7525e-01,  2.5118e-01,
         3.2347e-01,  1.5072e-01,  9.7762e-01, -2.2310e-01, -3.1427e-01,
        -2.1418e-01, -2.3777e-02, -1.8743e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3853e-01,  8.2870e+01,  2.9588e-01,  4.8800e-03, -1.0411e-01,
        -2.4049e-01,  1.1300e-01,  4.8187e-02,  8.7652e-02,  6.0289e-01,
         1.8701e-01,  1.5299e-02,  2.4862e-01,  1.7469e-01, -6.0727e-02,
         1.7429e-01, -5.8697e-02,  4.1407e-01, -2.9742e-01,  1.1378e-01,
        -8.2232e-05, -1.1988e-01,  6.5766e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9351e+00,  1.2443e+02,  9.6477e-01, -8.5593e-01,  1.8118e-01,
         1.8755e-01,  9.8622e-01, -8.4119e-01,  1.8637e-01,  1.1503e-01,
         4.5652e-01, -4.5326e-01, -3.2479e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2818e-01, -9.3786e+01, -1.6744e-01, -5.2019e-01, -2.3976e-01,
        -2.9052e-02, -5.3049e-02, -1.9162e-01,  2.4765e-01, -8.9350e-04,
         3.8452e-01, -4.6773e-01, -1.2362e-01,  5.9590e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4618e+00, -2.3505e+02, -4.9683e-01, -2.8577e-01,  5.4326e-01,
        -4.7225e-01, -3.5931e-01,  3.8468e-01, -1.9648e-01, -6.0989e-01,
         5.2800e-02, -1.8750e-01, -1.7373e-01, -2.7352e-01,  4.3669e-01,
         5.8378e-02, -2.7325e-01,  1.0538e-01, -1.3136e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-4.9083e-01, -3.5738e+01, -2.3180e-01,  3.3869e-02, -2.4471e-02,
         1.4501e-02, -4.0356e-02,  8.3057e-03, -4.9540e-02,  1.7663e-02,
         4.5147e-02, -1.7204e-01, -5.5953e-02, -8.8097e-02,  1.2716e-01,
         7.7572e-02, -7.4071e-02, -8.0150e-02, -8.7947e-02, -1.9476e-01,
        -1.0334e-01, -1.9748e-02,  1.5184e-02, -9.3296e-02,  1.5482e-01,
         1.7217e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1294e-01,  3.2356e+00,  4.4210e-03, -1.7463e-02,  3.7236e-03,
        -1.4333e-02, -2.9367e-03, -1.2426e-03, -1.3204e-02,  1.3809e-02,
         9.4888e-03, -1.7478e-02,  1.0954e-02,  2.2376e-03,  2.1728e-03,
         1.3333e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1556e-01,  5.2576e+00,  1.0449e-02,  7.8745e-02,  1.0485e-02,
        -7.0397e-05,  8.9875e-03, -8.6087e-03,  3.2027e-02,  2.7470e-02,
         1.9863e-02, -2.0504e-02, -2.2578e-03, -2.2973e-02,  2.8378e-02,
        -1.6106e-02, -7.5038e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([   1.9053, -102.4779,    0.7929,   -0.8307,    0.1447,   -0.6265,
           0.1167,   -0.1439,   -1.4881,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2072e-02,  7.6055e+01, -6.8033e-02,  2.5506e-01, -1.0104e+00,
         1.0626e-01,  1.7475e-01,  2.0816e-02,  4.8702e-03, -1.2403e-01,
         3.3867e-02, -2.5634e-02,  1.6638e-01,  4.1235e-02,  6.4332e-02,
         9.1918e-03,  2.6921e-02, -3.0294e-02,  4.6576e-02, -2.9093e-02,
        -9.2173e-02, -9.0141e-02,  5.2684e-02, -1.0703e-01, -1.3262e-02,
        -2.2126e-01,  2.7649e-01, -1.6219e-01, -7.8572e-02,  1.5208e-02,
        -7.6007e-02, -5.5099e-02,  1.1544e-01, -4.0712e-02,  1.2927e-01,
        -2.2220e-02,  4.9447e-02, -9.8704e-02, -1.6710e-01, -1.1020e-01,
         8.4841e-03,  5.5001e-02, -9.8060e-02,  1.2566e-02,  1.4853e-01,
         5.1149e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8732e-02,  7.2964e+00, -1.0335e-01, -3.6236e-02, -9.5071e-03,
        -1.8569e-02, -1.0310e-02, -2.0725e-02, -3.6838e-02,  1.9449e-02,
        -3.4680e-03,  1.9443e-02, -4.2296e-02, -5.5197e-03,  1.1581e-02,
        -4.5463e-04, -4.7149e-04, -2.9369e-03, -9.3082e-03,  6.1129e-03,
        -1.0558e-02, -1.8638e-02,  2.5688e-03,  8.8365e-03,  3.3412e-02,
        -1.7812e-02, -2.1805e-02,  4.0138e-03,  5.4559e-02,  2.2038e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3930e-01, -4.7491e+01,  2.5036e-01,  1.9521e-02,  1.4233e-01,
         3.1448e-01,  6.2636e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5168e-01,  4.8344e+01, -3.9579e-01,  1.9092e-01,  1.8555e-02,
         3.0923e-01, -2.7101e-01,  1.4535e-01,  2.6844e-02,  6.7583e-02,
         1.8670e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6623e+00,  1.5124e+02,  7.4995e-01, -5.1225e-01, -6.6932e-01,
         1.4714e-01,  9.4529e-01, -1.7146e-01,  4.2789e-01,  1.0066e+00,
        -5.9232e-01,  7.9926e-01,  7.8382e-01, -1.6054e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5139e-01,  2.1737e+02,  5.8625e-01, -3.8501e-01, -2.5542e-01,
         1.2231e-01,  6.1079e-01, -4.5590e-01, -5.2297e-01, -1.0321e+00,
        -1.1253e-01,  3.4789e-01, -4.0060e-02, -6.6602e-01, -1.9434e-01,
         8.7539e-01,  1.1301e-01,  3.1635e-01,  3.6522e-01,  3.0694e-01,
        -1.9481e-02, -6.1572e-01, -3.4295e-01,  5.6615e-02,  1.0939e-01,
         3.7370e-01,  1.2333e-01,  1.1841e-01,  2.2882e-01,  2.9164e-01,
        -1.2584e-01,  9.5171e-01, -2.8319e-01,  1.3358e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8738e-03,  2.7104e+01,  4.2009e-02,  1.6968e-01, -4.1291e-03,
         3.3489e-02, -9.6416e-04, -1.8700e-01, -4.0651e-02,  8.4432e-02,
        -1.9022e-01,  4.1403e-02, -1.8809e-03,  3.9114e-02,  1.7436e-01,
        -2.7847e-02, -5.9059e-02, -1.9766e-01, -8.6663e-02,  1.8774e-02,
        -1.4837e-02,  3.6084e-03, -1.6810e-01, -3.8758e-02,  3.4974e-02,
         3.4453e-02, -2.1605e-02, -5.6674e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3024e-02,  5.6181e-01, -2.0051e-03, -8.5143e-04,  6.2588e-04,
         7.7510e-04, -4.2417e-04,  7.4446e-04, -8.7668e-05,  7.6177e-04,
        -1.6681e-04, -1.7301e-03, -9.8603e-04,  2.2071e-04, -2.9980e-04,
        -1.5757e-03, -1.6584e-03,  1.2379e-03, -6.7514e-04, -4.2532e-04,
        -3.5381e-04,  1.7403e-03, -1.5701e-04,  2.2490e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-2.2181e+00,  1.2798e+02, -8.1147e-01,  6.4527e-01,  9.7753e-02,
        -5.5623e-01,  6.9505e-02, -4.1419e-01, -9.5715e-02, -2.5689e-01,
         1.1582e-01, -1.4786e-01, -3.7169e-02, -2.0572e-01,  6.5571e-02,
         1.9550e-03, -1.0307e-01,  2.2243e-01,  5.0724e-02,  8.1055e-01,
        -2.6106e-01, -1.6931e-01,  4.7546e-01,  4.1017e-02, -1.9695e-01,
         5.0310e-01, -4.1272e-03,  6.9015e-02, -1.4226e-01, -2.3161e-01,
        -2.2508e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1483, 63.1080, -0.5084,  0.3969, -0.1151, -0.5081,  0.0804, -0.2066,
         0.3673,  0.2436,  0.0791, -0.5520,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1492e-03,  1.0982e+01,  1.7154e-03, -5.5057e-02,  2.2670e-02,
         1.3888e-02, -9.6895e-03,  2.5037e-02, -3.3631e-03,  5.4822e-02,
         3.2079e-02,  6.7046e-03, -5.5962e-02, -4.7869e-02,  1.4115e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2212e+00,  2.0005e+02,  1.8405e+00, -8.1155e-01, -5.4275e-01,
         4.1059e-01, -4.1233e-01, -2.5760e-01,  1.6944e-01, -2.3129e-01,
        -2.6045e-01,  5.2042e-02,  4.0565e-02,  4.5473e-02,  2.4758e-01,
        -6.4528e-02,  1.7888e-01, -1.4795e-02, -3.9498e-01, -8.3979e-01,
         4.8238e-02,  1.0019e-01, -8.4782e-02, -5.8969e-01, -7.9776e-02,
         3.4456e-01,  1.4881e-01, -3.1695e-01, -1.1981e-01, -8.4741e-02,
         2.8466e-01, -2.4071e-01,  4.2891e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5496e-02,  2.9221e+01, -2.0710e-01, -2.1274e-01,  3.4373e-02,
         6.0736e-02,  4.5966e-02,  2.5641e-02, -2.1919e-02, -1.4468e-02,
        -8.2832e-02, -4.0622e-03, -2.1968e-02, -1.3080e-02,  1.5956e-01,
        -4.0858e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.9073e+00,  1.9557e+02,  1.0402e+00,  6.4175e-02, -2.6073e-01,
        -1.7985e+00, -2.9265e+00,  3.5143e-01, -1.5912e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7350, 28.7009, -0.0778,  0.2426,  0.6706,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6326e-01,  1.3508e+02, -6.0481e-01, -9.8107e-01, -4.5932e-01,
        -2.9663e-01, -5.3006e-01, -2.3262e-01, -2.9864e-02, -3.8951e-01,
        -2.7848e-01, -3.0942e-01, -4.8948e-01,  4.8043e-01, -4.7886e-02,
        -3.6030e-02,  1.8261e-01, -1.4975e-01, -2.0720e-01, -1.2025e-01,
        -2.4013e-01, -4.2349e-02, -1.3194e-01, -1.5005e-01, -9.2939e-03,
        -1.1284e-01, -6.3692e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([  0.4698, -99.9647,   0.3981,   0.6876,   0.8539,   0.2651,   0.6201,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2998e+00, -5.0865e+01, -1.2731e-01,  9.4639e-03, -5.8748e-02,
        -1.5861e-01,  1.3581e-01,  9.6392e-02, -2.2212e-01, -5.8944e-01,
        -9.0836e-02, -3.9900e-01,  6.4511e-02, -5.4268e-02,  1.4229e-01,
         2.0225e-01, -4.2215e-02,  2.4294e-02, -4.1492e-02, -6.2035e-02,
        -1.7067e-01, -4.5807e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1542e-02,  1.4380e+01,  1.1666e-01, -3.4739e-02,  1.3582e-01,
        -5.2899e-02, -3.1336e-02, -6.4657e-03, -2.4521e-03, -1.2016e-01,
         8.3594e-03, -2.6626e-02, -9.0338e-02, -9.8100e-02, -1.6231e-03,
        -2.5327e-02, -9.4345e-03, -4.2652e-02, -2.9490e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6441e-02,  3.2590e+00, -1.3281e-02,  3.8540e-05, -4.0856e-03,
         2.0287e-03,  6.3983e-03, -3.2867e-03,  8.0401e-03, -3.3831e-03,
         3.6096e-03, -5.6166e-03, -1.9719e-04,  4.6981e-03,  3.4498e-03,
        -2.3238e-03, -9.8667e-03, -5.5733e-03, -2.0194e-03,  2.0970e-03,
        -4.7507e-03, -9.9360e-03, -3.5474e-03,  2.0193e-03,  1.3864e-03,
         2.0971e-03, -4.9434e-03, -5.5248e-03,  3.4296e-03,  2.6680e-05,
         4.1868e-03,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-5.3958e-01, -1.5821e+02,  1.7531e+00,  4.7994e-01,  7.4436e-01,
         7.9295e-01, -1.0818e-01,  1.0139e-01, -1.2939e-01,  2.1903e-02,
         3.4908e-01,  6.8031e-01,  1.8600e-01,  2.1020e+00,  3.0333e-01,
        -5.5418e-01,  5.2324e-01, -2.9825e-01, -6.1091e-01,  1.0422e-01,
         1.5551e-02, -1.5686e-01,  2.3684e-02, -2.9037e-01,  2.3705e-02,
        -4.0922e-03, -1.6403e-01, -4.2683e-01, -1.4567e-01,  2.5470e-03,
        -2.8070e-01,  1.1671e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0056e+00,  1.9210e+02, -1.2103e-02, -4.1792e-01,  3.2141e-01,
         1.1530e-01, -1.9929e-01,  2.2502e-01,  3.7013e+00, -1.4950e-01,
        -3.3307e-01,  3.7606e-01,  4.8884e-01, -2.6085e-02, -7.1033e-02,
         1.2559e-01,  1.8808e-01,  6.0428e-02, -8.6766e-01,  3.5492e-02,
        -4.9187e-02,  1.6448e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1153e-02,  3.5925e+00, -2.4811e-02, -1.8763e-02, -2.0277e-03,
         1.0912e-02,  3.8069e-03,  8.6047e-03,  1.4756e-03,  6.2452e-03,
        -4.5496e-03, -5.9663e-04, -2.4444e-04,  2.5222e-03, -7.0418e-03,
         4.3924e-04, -2.4569e-03,  2.5977e-02, -9.8311e-03, -9.4644e-04,
         7.4529e-05, -1.2667e-02,  4.3101e-03, -1.0509e-03, -5.0693e-03,
         5.9521e-03,  2.9744e-03, -7.5677e-03,  4.0048e-03, -5.0021e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3590e-05,  4.5147e+00, -4.2472e-02,  5.7307e-03, -1.5508e-02,
        -2.5852e-02,  1.2929e-03,  3.5655e-03, -8.1846e-03, -8.8004e-03,
         4.5201e-03,  1.2635e-02, -1.1648e-02, -2.1278e-03, -1.4056e-03,
        -7.8603e-03, -2.5587e-02, -3.7902e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3016e+00,  1.6256e+02,  2.4448e+00, -3.2350e-01, -2.7280e-01,
        -4.7295e-01,  7.0713e-02, -1.4639e-02, -6.3733e-01, -1.7763e-01,
        -1.8827e-01, -7.2614e-02,  1.4324e-01, -3.9103e-02,  2.5476e-01,
        -2.4497e-02,  1.6977e-01, -5.6770e-01, -4.0861e-01, -5.6632e-02,
        -2.2839e-01, -7.8749e-01,  7.3740e-02,  1.6153e-01,  1.5278e-01,
        -7.0978e-03, -3.6260e-01,  6.9081e-02, -5.4729e-03,  8.0877e-02,
         1.9940e-01, -2.6012e-01,  1.5149e-01, -2.2781e-02,  1.3755e-01,
         7.2478e-01,  5.5562e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9006e-02,  2.5221e+00,  2.1632e-02,  1.5269e-03, -1.3552e-03,
        -4.2796e-04, -4.8820e-03, -3.0970e-03, -4.5210e-03,  8.3481e-04,
        -2.4828e-03, -7.3832e-03, -1.2594e-02,  3.3518e-03,  1.5212e-02,
         4.6885e-03, -1.8996e-04,  6.0071e-04,  3.8759e-03, -8.8162e-03,
         3.0430e-03, -4.5453e-03,  1.1227e-02, -2.5144e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.1593e-03,  2.0898e+00, -1.3198e-02, -3.1559e-03, -3.5987e-03,
         2.9800e-03, -2.7856e-03,  3.5900e-03,  3.4883e-04, -2.5991e-03,
         7.8129e-04,  5.6572e-04,  5.4573e-04,  1.5344e-03, -2.5975e-03,
         1.4782e-03, -5.6748e-04, -1.7012e-03, -2.6710e-03,  1.8071e-03,
        -1.9088e-03, -1.0958e-03,  1.1796e-03,  3.1388e-03,  5.3541e-04,
         1.6099e-02,  2.1441e-03,  3.0243e-04, -8.2226e-04,  5.9916e-04,
        -6.9153e-04,  9.9013e-04,  1.7061e-03,  9.3447e-04, -1.3900e-03,
        -1.1026e-04,  2.3635e-03,  1.0860e-03, -6.6925e-04, -1.3289e-04,
         6.7281e-04,  5.1080e-04,  2.7501e-03,  5.7522e-03,  5.2379e-04,
         3.2544e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6012e-01,  2.7474e+01, -3.1220e-01,  1.1981e-02, -9.5350e-02,
        -3.2228e-02, -1.1747e-01,  3.9704e-02,  3.4959e-02, -5.0131e-02,
         5.8477e-02, -6.0886e-02, -8.6307e-03, -4.7431e-02,  3.7803e-02,
         3.8630e-03,  4.5436e-03,  2.3580e-02,  7.0627e-02,  1.5652e-02,
         6.3596e-02,  6.5494e-03,  1.9136e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4257e-02,  1.0738e+00,  7.1989e-03, -7.3212e-03,  3.0548e-03,
        -3.5849e-03, -1.3497e-03, -2.0011e-03, -1.4121e-03, -9.9087e-04,
        -5.2090e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0947e-02,  2.5385e+01,  8.7860e-02,  8.0084e-02,  1.4343e-02,
        -3.1630e-02, -7.5117e-02,  9.8487e-02,  1.2134e-01, -5.7192e-03,
        -4.0301e-03, -5.6124e-02, -4.3791e-03, -1.7957e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1013e-02,  2.3945e+01, -1.4629e-01, -2.6643e-01, -8.8674e-02,
         1.6338e-03, -7.1323e-02,  1.7740e-02, -3.0404e-02, -2.6751e-01,
         8.3460e-03, -1.7026e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6381e-01,  5.2864e+01,  2.0758e-01,  2.9067e-01,  3.0406e-02,
         1.6634e-01,  2.6221e-02,  7.7900e-02,  2.2302e-03,  1.5777e-01,
        -3.8474e-02,  4.1006e-02, -8.0610e-03,  1.5489e-01, -2.6924e-03,
         3.2506e-02,  8.5984e-03, -4.9878e-02,  1.0134e-01, -4.3418e-02,
        -2.3484e-02,  1.6135e-02,  1.1233e-01, -1.8699e-02,  7.3960e-02,
        -1.8532e-02,  2.6747e-01,  5.2213e-02, -1.5440e-03,  1.1995e-01,
        -7.0862e-02,  2.8463e-01,  2.1448e-02,  1.4105e-01,  5.4883e-02,
         1.4101e-02,  1.6435e-02,  6.5864e-02,  5.8606e-02, -3.5527e-02,
         3.6743e-02, -1.9688e-03, -2.2722e-02, -2.9939e-02, -8.8777e-03,
         1.6925e-01,  3.9386e-02, -6.8761e-04, -1.3263e-02, -4.9650e-02,
         1.2753e-02,  6.0381e-02, -1.3089e-02, -2.9929e-02,  5.4794e-02],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 1.4439e-01, -1.6442e+01,  1.4578e-01,  4.7609e-02, -5.4800e-02,
        -1.1046e-02,  7.5242e-03, -4.0114e-03, -2.4937e-03,  4.8968e-02,
         1.0559e-01,  1.9598e-02, -7.3853e-02, -3.5343e-02,  2.6619e-02,
         5.1181e-03,  1.7310e-02, -7.7359e-02, -3.1558e-02,  2.3995e-02,
         2.1434e-02,  3.6723e-02, -9.0410e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1310e+00,  1.7344e+02,  1.1865e+00, -2.6753e-01,  1.5464e-01,
        -2.0551e-01, -5.3457e-01,  4.1127e-02, -7.2691e-02, -6.7954e-01,
        -1.3775e-01, -3.6606e-01, -2.5204e-01, -1.4541e-01, -1.6032e-01,
         4.3015e-02, -5.3263e-02, -3.1827e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6402e-03,  2.7779e-01,  6.7593e-03,  1.2144e-03,  1.3532e-04,
         1.3539e-04,  1.2494e-03,  8.6496e-04, -3.7497e-04, -1.3338e-03,
         1.4073e-04, -4.2789e-04,  1.8499e-03, -3.7440e-04,  2.6213e-03,
         1.6950e-04,  1.5031e-03,  1.2044e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1903e-01,  4.4250e+01,  4.3754e-01,  1.2512e+00,  1.2687e-01,
         7.5160e-02, -1.6293e-02,  1.0931e-02,  1.3415e-01,  4.8701e-02,
         7.4284e-02,  2.0397e-01,  2.2713e-01,  3.4913e-01,  1.2690e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8518e-01,  2.4613e+01,  1.2386e-01, -3.6027e-02,  7.9280e-02,
        -8.7908e-02, -1.5226e-01,  5.5423e-02, -9.9699e-03, -4.1525e-02,
         9.5692e-02, -4.3952e-02,  4.9870e-02,  1.7965e-02, -7.1768e-02,
         1.9832e-02, -1.0453e-01, -3.2499e-02,  2.6494e-02,  1.0383e-01,
         9.2714e-02, -6.9828e-02, -2.7514e-02,  8.2591e-03,  7.6812e-02,
         3.9452e-02,  4.8476e-02,  1.6982e-02,  1.3077e-01,  4.5465e-02,
        -2.8102e-02, -4.3048e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1651e-01,  1.1997e+01, -2.0902e-02, -6.6537e-02,  9.4025e-02,
         8.0409e-03,  1.8213e-02, -7.9434e-03,  3.4963e-02,  2.0592e-02,
        -7.1567e-03, -2.0843e-02, -4.7835e-02,  2.4201e-02,  1.1655e-02,
         7.4415e-03, -6.0540e-03, -7.8544e-05, -1.1381e-02,  2.3133e-03,
        -6.4619e-03,  2.5903e-02, -1.8405e-02, -2.8740e-02,  8.6136e-03,
        -2.9795e-05,  2.7910e-02, -1.8035e-02, -1.2573e-02,  1.5003e-02,
        -2.1109e-02,  2.4454e-02, -1.1050e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9596e+00,  1.6539e+02,  8.2417e-02, -2.1206e-01, -3.8495e-01,
        -4.2464e-01,  4.3545e-03,  2.2504e-01,  6.2578e-01,  4.6798e-01,
         2.7703e-02, -6.3682e-01, -6.5615e-02, -2.0634e-01, -3.3100e-03,
        -2.8077e-02,  2.9083e-01,  1.6611e-01,  2.5181e-01, -2.7004e-02,
         1.4240e-01, -2.0474e-01,  1.8630e-01, -8.3191e-02,  3.6242e-02,
         6.4115e-02,  9.5059e-03, -1.8834e-01, -2.9673e-01, -3.3978e-01,
         4.0742e-01,  6.9469e-02,  3.4932e-01,  7.0290e-03,  4.9753e-02,
         2.2588e-01,  3.9018e-02,  6.5460e-02,  1.0221e-01, -7.1935e-02,
         4.8422e-02, -1.6517e-01,  1.3365e-01,  3.7354e-03,  7.2834e-02,
         6.8044e-03,  6.6551e-02, -2.9340e-02,  9.7704e-02,  1.5459e-01,
        -8.1602e-02,  4.9891e-02, -8.9094e-02,  9.1491e-02,  8.1037e-02,
         1.9913e-01,  4.5370e-03, -1.6316e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1367e-01, -2.2942e+01,  2.1558e-01,  1.3736e-01,  4.2719e-02,
         1.8192e-02,  7.3762e-03, -3.1533e-03,  4.0273e-02,  1.1960e-01,
         1.7491e-02,  7.1931e-03, -6.4091e-02, -7.0591e-02, -1.0739e-01,
         1.5496e-02, -2.7243e-02, -3.8916e-02, -2.0443e-02, -5.9157e-02,
        -2.1102e-02, -7.5207e-02,  3.1869e-02,  7.0336e-04,  2.9202e-02,
         2.6167e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2628e-02,  4.1527e+00, -3.1759e-02,  2.7519e-02, -3.3202e-03,
         1.5989e-02,  1.4129e-02,  7.9999e-03,  2.8661e-03,  5.0168e-03,
         2.6661e-02,  1.6687e-03,  1.3871e-02,  1.5630e-02, -3.3458e-03,
         3.7540e-03, -1.6152e-03,  3.5365e-03,  3.4445e-03,  6.1626e-03,
        -3.6116e-03, -6.7247e-04, -4.4349e-03, -1.0064e-02, -1.0263e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3715e+00,  2.2295e+02,  3.0231e+00, -6.1730e-01,  2.8878e-01,
         2.7929e-03, -8.7762e-02, -8.3968e-01,  3.6616e-01,  5.6152e-01,
        -2.7079e-01, -1.1621e-01,  1.8852e-01,  4.8896e-01,  2.7387e-01,
         7.4967e-02,  3.2386e-01, -2.4617e-01, -3.5661e-02,  1.8758e-01,
        -9.6722e-03, -4.3909e-01, -6.9392e-02,  1.0589e-01, -1.1004e-01,
         3.3488e-01,  2.7658e-01, -5.1590e-02, -5.9720e-02, -5.7825e-02,
         1.5805e-01, -3.0288e-01, -5.4331e-01, -1.8167e-01,  6.9206e-01,
        -7.4094e-01, -6.2739e-02, -1.6180e-01, -3.7966e-01, -4.1367e-01,
        -1.3608e-01, -2.4185e-01, -9.3579e-02,  4.0096e-01, -6.9622e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0669e-02,  1.5552e+00,  4.1694e-03,  4.5773e-03, -2.6201e-03,
        -9.6722e-03, -4.8671e-04, -4.9374e-03, -1.1981e-03, -8.5055e-03,
        -5.4431e-03,  3.5839e-04, -5.6581e-03, -7.1518e-04,  2.1601e-04,
         1.2486e-03,  4.4857e-03, -6.9410e-03, -1.8018e-03, -3.0567e-04,
         6.5833e-03, -1.7376e-03,  9.7412e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9472e-01,  4.3404e+01,  1.1357e+00,  4.3202e-01, -2.0125e-01,
         9.7261e-01,  6.0421e-03, -1.3421e-01, -5.5716e-02,  1.1998e-02,
         1.2291e-02,  1.7488e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.3261e-01,  1.8753e+01, -3.1259e-01, -6.5865e-02,  2.0428e-02,
         2.8933e-02,  6.6624e-02, -2.1251e-02,  1.1670e-02, -3.5403e-02,
         7.5119e-02, -7.8864e-02, -1.5779e-02,  1.9102e-02,  8.4532e-03,
         5.1452e-02, -5.5615e-03, -1.1500e-04,  1.9110e-02, -1.1236e-01,
        -1.9807e-04,  2.2865e-02,  6.9374e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.5691e-01, -7.1758e+01,  1.5016e-01, -1.2365e-01,  1.0134e-01,
         7.6773e-02, -6.2607e-02,  7.1968e-02,  7.1997e-02,  5.0968e-02,
        -3.6331e-02,  2.4545e-01,  1.8788e-02,  1.5222e-01,  5.1743e-02,
         9.6243e-02,  1.6192e-02, -8.5470e-02, -5.4102e-02, -7.7856e-02,
        -5.8143e-02,  8.8004e-02,  1.8705e-01, -9.4452e-03, -3.4292e-02,
         5.2748e-02,  4.1366e-02, -3.6948e-02,  7.0435e-02,  2.2123e-02,
        -6.3405e-02, -1.3729e-01, -2.2202e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.1509e-01, -5.2891e+01,  8.5184e-02,  1.0753e-01, -1.1319e-01,
         2.0960e-02,  1.2052e-01,  1.9465e-01,  8.4243e-02, -4.0289e-03,
        -5.0911e-02,  1.2625e-01,  1.6596e-01,  1.8952e-01, -7.6757e-02,
         3.0107e-02,  1.0965e-02,  9.7020e-02,  4.2166e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4392e-02,  5.6397e+00, -1.4662e-02,  1.0065e-01,  1.1552e-03,
         4.4510e-02, -1.6315e-02, -9.0067e-03, -1.3322e-02, -3.7352e-03,
         8.9625e-04, -1.7102e-02,  2.6645e-02, -2.5922e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3730e+00, -3.1528e+02,  2.6156e+00,  1.7040e+00,  1.4087e-01,
        -6.5003e-01, -6.7908e-01, -9.2412e-02, -4.5122e-01, -1.8654e-01,
        -6.8025e-01, -9.2095e-02, -1.3029e+00, -1.0504e+00,  2.5343e-01,
         3.8830e-01, -1.1024e+00, -1.5724e-01, -3.1578e-01,  3.3471e-01,
        -6.4076e-01, -2.4996e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.3758e-05,  2.8535e+00,  6.0646e-03, -1.8053e-04, -5.4859e-04,
         3.3723e-03,  5.6289e-03, -9.7028e-03, -2.9700e-03,  1.6108e-03,
        -6.2843e-04,  1.5370e-02,  1.9571e-02,  6.7238e-04, -1.0686e-02,
        -4.3517e-03,  8.9962e-03,  9.2128e-03,  3.8086e-04,  3.8955e-03,
        -9.8218e-04,  2.5926e-03,  2.4172e-03,  5.1397e-03, -3.6435e-03,
         1.6833e-03,  3.9804e-03, -3.9002e-03,  7.7377e-04,  1.2140e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3168e+00, -1.6963e+02,  1.7379e+00,  2.2893e+00, -5.3389e-01,
        -3.6740e-01,  5.1027e-02,  1.2417e+00,  3.1663e-02,  3.8662e-01,
         5.0069e-01,  3.9940e-01,  7.2796e-01,  4.7807e-01,  2.8794e-01,
         1.8565e-01,  1.1686e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0474e-01,  2.2312e+01,  1.3360e-02,  6.4504e-02, -5.3204e-04,
         3.3909e-02,  2.4851e-02,  6.9015e-02, -6.3569e-03,  4.7393e-02,
        -1.3421e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2176e+00, -6.4745e+01, -2.3763e-01, -1.0522e-02,  3.3611e-02,
         8.2601e-02,  5.0636e-02,  1.0875e-01,  2.9013e-01, -7.9329e-02,
         1.8765e-01,  1.2405e-03,  2.7575e-01,  2.2002e-01, -6.2093e-02,
         1.4024e-01,  4.2429e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6015e-01,  1.4098e+01,  1.6376e-01, -6.3002e-03,  2.9979e-02,
        -2.3430e-02,  5.2684e-02,  8.4798e-03,  8.5015e-03, -2.5983e-03,
         3.3767e-02,  2.4374e-02,  2.6360e-02, -5.6263e-02,  7.5913e-02,
        -1.6313e-02,  3.0223e-03,  4.1560e-02,  1.4725e-02,  4.6526e-02,
        -2.1616e-02,  1.5418e-02,  6.6265e-02,  1.5290e-02,  4.6441e-03,
         6.1799e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.8153e-01,  1.4247e+02, -9.3521e-01, -1.7326e-01,  6.2575e-02,
         4.7525e-02,  2.7889e-01,  5.4370e-01,  3.6627e-01,  3.1641e-02,
         7.7554e-02,  3.6435e-01, -5.1868e-02, -2.7464e-01,  5.8523e-03,
        -8.2522e-02, -1.2039e-01,  5.6270e-02,  1.5789e-01,  8.4718e-02,
        -2.2931e-01,  5.6974e-01, -3.6880e-02, -1.9509e-01, -2.9586e-01,
        -1.0559e-01,  2.6709e-01, -4.4449e-01, -4.9434e-01, -2.2624e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5661e-01,  1.7732e+02, -2.1927e-01, -2.5357e-01,  6.3767e-01,
        -9.2142e-02,  7.0540e-01, -3.4441e-01,  1.6514e-01,  7.8083e-01,
        -3.5256e-01,  7.9455e-01, -4.5578e-01, -2.1582e-01, -3.5573e-01,
        -5.6401e-02, -4.8554e-01, -8.3562e-02, -1.2712e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-1.2174e+00, -1.5205e+02,  6.1703e-02,  8.5080e-01,  4.1122e-01,
         6.5210e-02, -2.3082e+00,  3.8939e-01,  4.5788e-01, -2.0178e-01,
        -4.9903e-01,  1.0814e-01, -3.3159e-02,  2.1841e-01, -2.0202e-01,
        -5.5765e-02,  1.2078e-01,  6.6453e-02, -2.4191e-01,  9.3054e-03,
        -6.9306e-02,  6.3845e-01,  1.8561e-01, -1.6662e-01,  3.2317e-01,
        -1.1423e-01,  2.5561e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0665e-01,  1.2471e+02, -2.6217e-01, -1.7209e+00, -5.0489e-01,
        -5.1462e-01, -1.8963e-01,  4.5613e-02,  2.9495e-01, -1.6391e-01,
        -1.4709e-01, -4.1720e-01,  3.0517e-01, -9.5904e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.6333e-01,  1.3510e+02, -6.1676e-02, -1.6428e-01,  2.3972e-01,
         1.1792e-01,  3.6851e-01,  7.4392e-01,  2.1983e-01,  2.4823e-01,
         1.3488e-01,  1.0567e-01,  2.7799e-01,  2.9807e-01,  2.6815e-01,
         4.8590e-01,  2.3479e-01, -3.0167e-01,  2.2690e-01,  6.8401e-01,
        -3.1638e-01, -3.5010e-01,  3.4042e-02,  7.4597e-02, -5.8966e-02,
        -6.0447e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3008e+00,  7.9241e+01,  4.4404e-01, -3.2120e-01, -2.4532e-01,
        -7.6925e-01,  2.0697e-01,  2.4541e-02,  1.4199e-01, -4.9291e-01,
         8.4233e-03,  1.3665e-01, -2.7354e-02, -9.8530e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6777e-02,  3.1959e+01, -2.4505e-02,  2.1767e-02, -6.4412e-02,
        -9.9968e-03, -1.7344e-02,  1.6163e-03, -1.4026e-02, -5.7248e-02,
         9.5386e-03, -3.2575e-02,  6.6536e-03, -5.4524e-02,  3.6843e-02,
         3.1530e-02, -1.1695e-02, -4.8948e-02,  1.5349e-02, -1.0392e-01,
        -1.8541e-02,  5.1088e-03, -3.6159e-02, -8.4940e-02,  4.2928e-02,
        -3.0067e-02,  8.7286e-03, -4.6097e-02, -3.7578e-03, -3.8098e-02,
         2.1756e-03, -2.6324e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6835e-02,  2.3395e+01,  7.6617e-02, -1.4866e-01,  9.5130e-02,
        -1.4472e-02, -1.2240e-01, -7.2525e-02, -8.1130e-03, -3.4899e-05,
        -7.0121e-02, -1.0350e-02, -1.2583e-03, -2.0390e-02,  1.8846e-02,
        -6.9829e-02, -3.9455e-02,  2.0414e-02, -3.6161e-02,  1.0640e-03,
         2.5870e-04,  1.9170e-02, -2.5131e-02,  1.1066e-03,  5.5446e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7474e+00,  7.4401e+01,  1.2652e+00, -1.4359e-02,  3.3109e-01,
         4.9281e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4378e-01,  1.0795e+01, -2.0105e-02,  2.8927e-02,  3.0735e-02,
        -2.8273e-02, -2.3180e-02, -1.8614e-02, -1.4141e-02,  1.7514e-02,
        -5.8172e-03,  1.4855e-02,  1.7280e-02, -2.4536e-03, -9.3637e-03,
         6.2541e-03,  8.5905e-03,  5.8276e-03, -2.7959e-03,  1.4599e-02,
        -1.0234e-02,  7.6580e-04, -2.4838e-03, -2.0057e-03, -1.2841e-02,
         1.1332e-02,  8.6332e-03,  1.1037e-02, -6.5409e-03, -1.5831e-03,
         6.2992e-03, -1.2202e-04, -6.3811e-03, -1.5119e-03,  1.0174e-02,
         7.7888e-02,  1.1408e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6305e-02, -8.3032e+00,  3.6245e-02, -1.7607e-02,  2.0477e-02,
        -1.2615e-02,  5.1311e-03,  7.1522e-03, -6.2421e-03,  4.8030e-03,
         7.3550e-03, -2.6878e-02,  6.0112e-03,  2.7279e-03,  2.1729e-02,
         3.0277e-02,  1.8379e-02,  4.3613e-03, -3.5086e-03, -4.2227e-02,
         1.0350e-01,  7.2604e-03,  3.1055e-02,  1.0204e-02, -3.6918e-06,
        -1.0457e-02,  2.8268e-02,  6.2686e-03, -1.5130e-03, -2.0920e-02,
         6.0178e-03,  5.0909e-03,  5.4834e-03,  2.0583e-03, -1.4791e-02,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2610e-01, -1.6403e+01,  1.0268e+00,  7.8876e-02,  9.6964e-02,
        -4.5659e-03,  3.6823e-02,  1.1810e-02, -8.9908e-03,  1.9222e-02,
         1.1260e-02,  7.0713e-02,  4.4404e-02,  1.1315e-02,  1.3746e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0664e-01, -3.1075e+02, -4.4809e-02,  1.6312e+00, -1.1381e-01,
         3.9265e-01, -3.1836e-01, -3.4477e-01, -6.1197e-01, -8.1704e-01,
        -5.4688e-01,  2.5652e-02, -7.0873e-01,  5.8256e-01, -6.8755e-01,
         1.3495e-01, -4.5549e-01, -4.0996e-02, -1.8516e+00, -4.3162e-01,
         2.3886e-01, -3.4102e-01, -7.0625e-01, -4.4273e-01,  1.1226e-01,
        -4.7855e-01, -1.9954e-01, -3.5656e-01, -9.2315e-02, -4.7209e-01,
        -5.4136e-01,  1.5518e-01, -9.9963e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8103e-01, -6.0543e+00,  3.3614e-02, -8.2008e-03, -1.8971e-03,
         3.4222e-02, -5.1119e-03, -7.1886e-03, -1.1858e-02, -4.2647e-03,
         6.3382e-03, -2.2676e-02, -3.9442e-04,  1.4307e-03, -1.1919e-02,
        -1.4093e-02, -2.2341e-02, -4.0533e-05, -5.9700e-03,  6.0197e-05,
         1.3760e-03, -2.0304e-03, -1.1471e-02, -4.5675e-03, -1.2248e-02,
         1.3560e-02,  2.3044e-03, -2.9415e-02,  3.6360e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-8.0545e-01,  2.2853e+02, -7.7502e-01, -4.6155e-01, -1.0704e+00,
         5.2064e-01,  4.0138e-01, -3.9674e-01, -4.5185e-01, -7.9557e-01,
        -2.3005e-01,  4.8712e-01,  4.7906e-02, -9.5538e-02, -5.2217e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2445e+00, -2.9102e+02, -3.3995e+00, -6.8175e-01,  2.5345e-01,
        -2.0721e-01, -4.1330e-01,  1.1941e+00, -5.0618e-01, -3.0377e-01,
        -2.4127e-02, -1.6338e-01, -3.1128e-01,  2.2528e-03,  2.2016e-01,
        -9.4308e-01,  2.3825e-02, -6.2406e-01, -2.2312e-01,  4.9638e-02,
        -2.6680e-01,  3.1868e-01, -8.8889e-02, -2.0201e-01, -1.4890e-01,
        -2.1256e-01, -1.0450e-01, -4.7842e-01,  3.0073e-01, -8.3968e-02,
        -2.6970e-01, -1.7660e-02,  7.0249e-02, -1.4048e-02, -2.6446e-01,
        -3.5938e-01, -3.4775e-01,  1.3421e-01, -3.0581e-02,  2.8513e-01,
        -2.0523e-01, -4.3039e-01,  5.8051e-01, -1.5866e-01, -1.6748e-01,
        -2.2890e-01,  1.1689e-01, -1.4133e-01,  1.1298e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2444e-03, -3.8988e+00,  2.5276e-02, -9.2058e-03,  1.4968e-02,
        -3.6091e-02,  1.5104e-02,  7.8073e-03, -5.6459e-03, -7.6076e-03,
        -1.1681e-02, -2.3704e-03,  1.8071e-04, -6.8407e-03, -3.0264e-03,
        -9.6619e-03, -1.8650e-02,  8.7914e-05,  1.7221e-03, -3.3975e-03,
         1.0818e-03, -2.5250e-03, -6.6319e-03, -4.0770e-03, -4.7617e-03,
         3.1258e-03,  5.8684e-03, -2.9134e-02, -2.1502e-03,  8.8092e-03,
        -4.6746e-03,  4.2041e-04, -4.5159e-03, -7.0563e-04,  1.4520e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3212e-02,  3.6748e+00, -4.5151e-02, -2.6955e-02, -4.6825e-03,
         4.5955e-03, -1.5077e-02,  4.2744e-03, -1.0354e-02, -4.7691e-03,
        -2.8058e-03, -5.2240e-03, -6.0173e-04, -3.9858e-03,  1.6061e-03,
        -1.3587e-02,  4.8364e-03, -1.3406e-02,  4.9048e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9163e+00, -2.1855e+02,  2.2920e-01, -1.5677e-01,  5.2759e-01,
         6.5452e-01,  5.8347e-01, -1.3725e-01,  1.4725e+00,  1.6457e-01,
        -4.7462e-01, -7.1260e-02, -7.1323e-01,  6.5833e-01,  2.0672e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7494e+00, -2.3634e+02,  1.2075e+00, -3.1048e-01, -2.6866e-01,
        -1.0847e-01, -1.1929e-01, -6.5832e-03, -1.1394e-01, -9.4001e-01,
        -2.8602e-01,  5.2025e-02, -3.9631e-01,  1.8647e-01, -3.1035e-01,
        -6.2986e-02, -2.5073e-02,  6.8003e-01, -1.6699e-01, -1.3269e+00,
        -6.5274e-01,  9.3433e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4027e-02, -2.6850e+00,  1.8751e-03, -6.3220e-03, -3.4999e-03,
        -5.5454e-03,  6.7996e-03,  2.0289e-03,  1.4395e-02,  1.0728e-03,
         2.0836e-03, -5.9081e-03,  8.7470e-03,  6.3918e-03, -3.6267e-05,
         1.4514e-03,  1.2503e-02,  2.8311e-03,  4.2143e-03,  4.0790e-03,
         1.4482e-04,  1.6651e-03,  5.1698e-03,  1.3297e-03, -4.9124e-03,
         7.8409e-04,  3.3759e-05,  3.5301e-03,  1.4424e-03,  4.9603e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6870e-02,  1.2356e+02, -9.3327e-01, -4.7482e-01,  1.9884e-01,
        -6.4606e-01, -3.0423e-01, -1.1956e-01, -1.0205e-01, -4.5882e-02,
        -1.2870e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5804e-01,  3.6861e+01, -4.9861e-02,  4.0596e-02,  2.8148e-02,
         7.8512e-04,  4.8360e-02,  5.1982e-02,  5.2605e-02, -1.8545e-01,
        -1.2583e-02,  9.7068e-02,  1.8591e-02, -1.6392e-03, -2.6525e-02,
         5.6035e-02, -1.0228e-01,  2.1112e-02, -7.0810e-02,  4.2562e-02,
        -5.0840e-02, -2.2343e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8427e+00, -2.8425e+02, -1.8040e+00,  3.2508e-02, -2.5640e-01,
         1.4030e+00, -1.5415e-01,  4.0121e-01,  6.2488e-01, -1.1514e-01,
        -6.4682e-01, -5.0333e-01, -1.8410e-01,  5.2224e-01, -3.6477e-02,
         6.1983e-01, -5.8059e-01,  2.6290e-01,  6.2537e-01, -1.5375e-01,
        -4.9156e-02,  6.1778e-01, -5.8967e-01,  5.5785e-02, -2.8832e-01,
        -1.1275e-01,  7.4143e-03, -2.1712e-01,  2.0362e+00,  2.2312e-02,
        -7.2027e-01, -1.2692e-01,  5.3764e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0092e-01, -2.6278e+01,  2.0798e-02,  3.7431e-03, -3.3029e-02,
        -8.9397e-02, -8.5691e-02,  1.6597e-01, -8.0426e-02,  3.9516e-03,
        -1.5499e-03, -4.2677e-02, -2.5400e-02,  4.3440e-03, -1.5779e-02,
        -2.9347e-01, -7.7364e-02,  3.5907e-03, -2.7183e-02, -5.5460e-03,
         1.4908e-02,  7.6382e-02, -6.0819e-03, -5.6463e-02,  9.4706e-02,
        -1.4332e-02, -6.4260e-02, -6.3497e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4715e+00, -1.0491e+02,  5.2581e-01, -2.1038e-01,  1.9596e-01,
        -2.0973e-01, -1.3266e-01,  2.5309e-01,  9.9383e-01, -2.0131e-02,
        -6.4401e-02,  5.3384e-02,  2.6589e-01,  5.6465e-02,  1.2546e-01,
        -7.8529e-02,  3.0054e-01,  2.0503e-01,  1.7850e-01,  9.3488e-01,
         3.6573e-02,  1.1914e-03,  1.7521e-02,  9.2961e-01,  5.3702e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 1.0553e-01,  3.4836e+01,  3.0757e-01, -1.8502e-01, -1.9528e-02,
         1.0208e-01, -1.1798e-01, -5.8565e-03, -3.0685e-02, -1.0790e-02,
         1.0133e-01,  9.1098e-02,  1.7080e-02, -1.1383e-01,  5.4036e-02,
        -1.8656e-01, -3.0212e-02, -1.6896e-02,  1.2642e-02, -3.6499e-02,
         5.8306e-02,  1.3774e-02, -8.8800e-02, -2.9395e-02,  9.7061e-02,
         4.5490e-03,  3.2588e-02, -1.3703e-01,  4.9490e-03,  6.6725e-02,
        -7.2011e-02, -2.4882e-02, -5.9205e-02, -3.7681e-03, -3.1489e-02,
         4.0153e-02, -2.4422e-02, -1.7244e-02,  2.2409e-02, -1.0885e-01,
         1.0395e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9150e-03,  3.5739e+00, -2.2264e-03,  1.5071e-03, -7.3561e-03,
        -4.6329e-03, -5.0904e-03,  2.9170e-03, -7.7154e-03, -1.0953e-03,
         1.5652e-03, -1.5595e-02,  2.2784e-02, -4.3906e-03,  1.1812e-02,
         2.1409e-03, -4.5875e-03, -1.4596e-03,  9.5639e-05,  1.1129e-02,
        -1.3638e-03, -2.6163e-02, -2.1885e-03,  1.1738e-02,  3.0848e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2937e+00, -2.6024e+02, -3.1129e+00,  8.3024e-01, -5.5794e-01,
        -1.1296e-01,  5.8572e-01,  2.1581e-01,  2.2179e-01, -5.6451e-01,
         2.1243e-01, -2.8264e-01, -4.9089e-01,  2.7913e-01, -1.7717e-02,
         7.2882e-02,  6.9339e-01,  1.4753e-02,  1.7735e-01, -3.1141e-01,
        -1.4449e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.1386e-03,  3.1156e+00,  6.4600e-02, -1.8863e-02,  1.9098e-02,
         1.6357e-02,  1.7696e-03,  6.4776e-03, -9.7497e-03, -2.1000e-02,
         1.6234e-02, -1.1133e-02,  3.6660e-03, -5.2753e-03, -6.7415e-03,
        -7.6998e-04, -1.6805e-02,  1.0834e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6946e-01,  3.6935e+01,  8.3739e-01, -1.6932e-01,  1.3079e-01,
        -1.1807e-02,  1.6522e-01, -8.8024e-03,  5.3691e-02,  5.0249e-02,
        -3.3884e-02, -5.1575e-03, -2.2041e-02,  9.7700e-02, -2.8857e-02,
         8.3704e-02,  3.7368e-01, -3.1377e-02, -1.1087e-02, -1.4143e-02,
         4.6372e-02, -4.6654e-03, -5.0452e-02,  4.3595e-02, -4.2187e-03,
         9.6086e-03,  3.2601e-02, -4.1063e-02,  1.5578e-03, -3.5112e-02,
        -3.5069e-03,  1.1687e-03, -5.3305e-02,  2.7179e-02, -1.0354e-02,
         4.7675e-02, -3.4332e-02, -2.6592e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2061e+00, -2.2730e+02,  4.3148e+00,  1.6123e-01, -1.4369e+00,
        -4.4375e-01,  1.6138e+00,  1.7888e-01, -1.1959e+00,  5.0281e-02,
        -5.3847e-01,  1.5577e-01, -1.3181e-01,  6.4877e-01, -4.1830e-01,
         1.5077e-01, -4.2208e-01, -8.2972e-01,  1.5843e-01, -1.0238e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2887e+00,  1.6547e+02, -1.3664e+00,  1.2415e+00, -4.8176e-02,
        -6.2673e-01,  7.3106e-01,  1.0130e-01,  4.6648e-01,  5.2218e-01,
         1.1115e+00, -7.7957e-02,  6.5871e-01,  1.5259e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1991e+00,  6.2439e+01, -1.2405e-01,  1.3426e-01,  1.9836e-03,
        -1.2673e-03, -1.2054e-01,  3.8186e-03, -8.6005e-02,  4.1821e-02,
         8.5367e-02,  1.6527e-01,  7.1617e-02,  8.5564e-03, -4.3813e-02,
        -1.0993e-01, -8.1832e-02, -1.5141e-02,  3.9183e-02,  7.4341e-03,
        -1.3894e-01,  1.7987e-01,  2.2601e-02,  9.3936e-02, -1.3435e-02,
        -5.6289e-02, -3.5332e-02, -3.6747e-02, -7.4599e-02, -9.6483e-02,
        -1.2809e-01,  1.4974e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1344e-01,  1.6239e+02, -5.6433e-01, -9.9431e-01,  5.2694e-01,
        -2.9852e-01, -8.4236e-01, -3.0629e-01, -2.4999e-01, -5.1878e-01,
         2.0253e-01,  4.6252e-01, -1.5920e-01, -6.9366e-01,  4.1534e-01,
         1.0948e-01, -1.0279e-01, -1.4409e+00, -8.7765e-01, -1.3036e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.5813e-01,  1.2130e+02,  1.8256e+00, -2.4525e-01,  4.6795e-01,
         1.1550e-01,  1.4561e-01, -9.1792e-02, -7.8842e-02,  3.8171e-02,
        -2.3466e-01,  6.6608e-02, -2.6285e-01, -1.1686e-01, -3.1101e-01,
        -2.2898e-01, -1.4191e-01,  2.5264e-02, -2.6442e-01, -6.1007e-03,
        -2.9508e-01,  2.0405e-02, -2.4751e-02,  2.2559e-03,  8.8570e-02,
        -7.1124e-02, -4.5487e-01,  8.2470e-02, -8.1657e-01, -1.9179e-01,
         1.5767e-02,  1.9895e-01, -5.0271e-02,  4.7575e-01,  3.3394e-02,
        -1.0665e-01, -2.1829e-01, -2.4567e-01, -3.2465e-01, -8.0815e-03,
        -2.4668e-01, -6.5436e-02,  1.7479e-01,  1.9899e-01,  3.7303e-01,
        -1.1160e-01, -8.8150e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4278e-01,  2.7259e+01, -6.8686e-02, -9.1654e-02, -1.2068e-02,
        -3.3045e-02, -1.3144e-01,  1.1260e-01,  2.2001e-01, -3.0786e-02,
        -4.1944e-02, -6.1469e-02, -2.1653e-04, -1.5142e-02,  7.9570e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.4265e-02,  6.4509e+00, -5.9033e-02,  2.0702e-03, -2.6439e-02,
        -1.4306e-02,  4.0088e-03, -2.1234e-02,  5.5005e-03,  1.7359e-03,
        -2.2473e-02, -2.2325e-03,  1.0243e-05,  1.5048e-02, -2.5447e-02,
        -3.3666e-02, -2.2580e-02, -5.6575e-03, -8.7230e-04, -2.7290e-03,
        -3.9385e-03, -1.6462e-02, -3.3433e-03,  2.6164e-03,  3.4179e-03,
        -1.7974e-02,  1.7390e-03,  7.7121e-02, -9.4711e-03, -1.6630e-03,
         7.2320e-03, -1.9645e-03, -4.6897e-03, -1.9290e-03, -4.2513e-03,
        -8.5711e-03, -9.2546e-03, -8.0385e-03, -1.8356e-02, -7.4024e-03,
        -2.1620e-03,  1.1466e-02, -5.8334e-03, -8.8425e-03,  5.8546e-03,
        -3.3091e-03, -3.0852e-02, -2.6019e-03], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 1.5686e+00, -2.0617e+02, -1.9357e+00,  3.1242e-01, -2.4662e-01,
         1.4895e+00,  7.3470e-01, -4.1838e-01, -1.9135e-01,  1.0994e-01,
        -1.6121e-01,  8.2336e-01, -3.6382e-01, -1.9041e-01,  3.9126e-02,
        -2.0131e-01,  1.7284e-01, -1.1581e-01, -2.8184e-01,  3.3248e-01,
        -1.1387e-01,  6.5533e-02, -3.9580e-01,  1.6915e-01, -9.0374e-01,
        -1.0866e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1403e-03, -1.5702e+01, -5.7291e-02, -6.3304e-02, -4.4968e-02,
         2.8249e-02, -3.7257e-02, -1.3178e-01, -6.1989e-02, -4.3053e-02,
        -1.0794e-01,  7.0690e-03,  2.1215e-03, -5.8955e-02, -2.5834e-02,
        -1.5914e-02, -3.2743e-02, -1.2110e-02, -6.8185e-03,  6.3074e-03,
         2.1126e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.8725e-03,  5.9513e-01,  2.8368e-03, -2.9427e-03, -1.9019e-03,
        -2.3858e-03,  8.0615e-05,  1.2711e-03,  1.7912e-04,  2.9271e-04,
        -5.9122e-04,  9.7947e-04,  1.1037e-03,  8.1559e-04,  2.2939e-03,
        -1.0031e-03,  4.7857e-05,  5.2083e-04,  1.2842e-04,  6.4563e-05,
         5.5522e-04, -1.1794e-03, -1.9244e-03,  8.0368e-04,  1.1737e-03,
         4.9059e-05, -1.2464e-03, -1.3773e-04,  1.1953e-04, -2.8648e-05,
        -7.5984e-04, -4.4218e-04,  1.7498e-03,  6.6481e-06, -8.5625e-04,
        -1.0385e-03,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0082e-03,  2.7463e+00,  3.1041e-03, -3.2239e-03,  1.0052e-02,
        -1.0573e-02,  3.2943e-04,  4.5440e-03, -5.8710e-03,  1.0997e-02,
         8.6694e-03, -9.2438e-04,  3.6412e-03, -1.7332e-03,  1.2733e-03,
         8.0495e-03, -3.5587e-03,  1.0648e-02, -1.8791e-02,  1.3405e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6650e-01,  1.6699e+02, -8.3447e-01,  7.4585e-01,  1.3217e-01,
         1.5373e-01, -1.4912e-01,  2.4857e-01,  1.2372e-01, -1.5216e-01,
         4.5313e-01, -4.5105e-01,  4.5596e-01, -1.7337e-01,  4.5877e-01,
        -3.1726e-01, -3.3101e-01, -1.4790e-01,  9.7217e-02, -1.8078e-01,
         3.6919e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2860e+00,  1.8897e+02,  1.0201e+00, -1.3814e+00,  3.6997e-01,
        -5.6882e-02, -2.4921e-02,  1.0158e-01,  1.0373e+00,  9.3302e-01,
        -1.5451e-01, -6.8073e-01,  1.5187e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6428e-02,  3.4322e+00,  3.9515e-02,  1.9171e-03,  2.3077e-02,
        -1.6032e-03,  1.3901e-02,  3.2167e-04,  3.9294e-04, -1.0693e-02,
         2.7041e-03, -1.7061e-02,  2.2784e-03, -4.9776e-03,  2.5586e-03,
        -1.7978e-03,  3.9759e-03,  1.0175e-02,  1.6095e-04, -2.5525e-03,
         4.3892e-03,  8.8949e-03, -3.6463e-03, -7.7705e-03,  2.2134e-03,
        -4.5214e-03, -4.1389e-03, -4.8033e-03, -3.0379e-03, -4.5337e-03,
        -4.1783e-02,  2.2788e-03, -4.4570e-04, -8.1001e-03, -3.1106e-04,
        -3.6696e-02,  1.0976e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6363e-02, -1.1696e+01, -2.2416e-01, -2.9110e-02,  2.9437e-02,
        -6.7214e-03, -1.6194e-02, -4.4206e-02, -1.2786e-02, -1.5864e-02,
        -3.3107e-02, -1.3322e-02,  1.6723e-02, -1.6976e-02, -9.1532e-03,
        -1.8711e-02, -2.2203e-02, -1.4035e-02, -7.1399e-03,  1.1194e-02,
        -1.8189e-02, -3.3906e-03, -3.4630e-02,  7.1423e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8223e-01,  9.6897e+01,  1.0571e+00,  4.2138e-01, -1.4204e-01,
        -1.3156e-02,  1.0491e-01,  4.1386e-02,  1.0320e-02, -1.0616e-01,
        -5.0683e-01, -4.4548e-01, -6.3155e-01,  2.8444e-01,  2.4177e-02,
         6.6193e-03,  1.5923e-02,  6.9858e-02, -8.2896e-02,  2.2085e-01,
         4.6493e-03, -3.5370e-02, -1.0879e-01, -1.5519e-01,  1.7054e-01,
        -1.5625e-02, -1.4400e-01, -2.2379e-01, -6.7811e-03,  8.2704e-02,
        -1.4142e-01, -1.7598e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0370e-02,  3.6737e+00,  2.3530e-02, -1.0387e-02,  9.7222e-03,
        -2.5654e-02, -1.8096e-03, -4.1770e-03, -8.4399e-03,  2.1610e-03,
        -6.6891e-04, -1.3567e-03, -4.1323e-05,  7.3309e-03,  4.6384e-04,
         1.2455e-03,  6.4709e-03,  2.0279e-03,  8.9682e-03,  5.4761e-03,
         1.7269e-02, -2.4330e-03, -1.9618e-03,  1.7973e-04,  1.9870e-03,
        -9.0126e-03,  1.6246e-03,  3.3144e-03,  6.2198e-03, -8.0608e-03,
         1.0243e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2278e-02, -3.0090e+00, -2.8348e-02, -2.5768e-03, -5.2612e-04,
         7.5598e-03, -1.0187e-02,  4.7246e-02,  2.1103e-02,  2.6389e-03,
         5.0279e-03, -4.2881e-03, -1.1856e-03,  1.6746e-02, -8.9657e-04,
        -3.9117e-03,  6.1449e-03, -8.1516e-03, -1.6481e-03, -1.7400e-02,
         4.6787e-03,  2.4362e-03,  1.6690e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8244e-01,  1.0605e+02, -2.6669e-01, -3.2292e-01,  1.1260e-01,
        -3.6500e-02, -7.0266e-01, -2.5333e-01,  9.1833e-02,  5.9915e-02,
         3.4640e-01, -2.3254e-01,  2.2696e-03,  1.0552e-01, -6.9950e-02,
         1.5075e-01, -1.0008e-01, -1.8074e-01,  1.6925e-02, -1.1755e-01,
        -2.8908e-01, -5.0703e-02,  1.0923e-01,  1.6810e-01,  2.8499e-02,
         9.4144e-02,  6.3256e-02,  1.7398e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 7.7240e-01, -2.6844e+02,  8.5953e-01,  8.8053e-01,  5.6531e-01,
        -1.5336e-01,  2.8505e-01, -3.5683e-01,  5.1590e-01,  7.9678e-01,
         4.1375e-02,  6.3401e-02, -4.4896e-02,  6.1964e-02,  3.2319e-01,
         1.3352e-01,  1.1307e-01,  4.6213e-01, -1.2541e-01,  8.7225e-01,
         1.5096e-01,  8.2972e-02,  4.7444e-01,  4.1851e-01, -2.0105e-02,
        -1.7740e-02, -1.7439e-01,  8.7830e-02, -5.2393e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0977e-01,  3.3287e+01,  1.1421e-01, -3.3301e-02,  1.6407e-01,
        -5.7156e-02,  5.3685e-02,  5.6894e-02,  5.2764e-02,  1.0892e-02,
        -3.6402e-02, -3.8723e-02,  1.6114e-02,  2.1281e-02,  5.3903e-03,
        -6.2403e-02,  1.3157e-01, -4.4986e-02,  2.2296e-01,  5.1682e-02,
        -7.9064e-03, -1.4910e-02,  3.6454e-03,  3.0743e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3845e-02,  5.3719e+00,  1.6716e-02,  8.6135e-04,  1.9740e-02,
         4.0859e-02, -1.4806e-02,  7.5973e-03,  1.1872e-02, -3.0863e-03,
         2.3126e-04,  2.4552e-02,  6.4191e-03,  1.6415e-02, -5.6850e-03,
        -2.2123e-03,  8.5323e-03, -9.5834e-03,  1.0828e-02,  4.6935e-03,
        -2.8385e-02, -8.4907e-03, -5.0841e-05,  8.4180e-03,  6.6762e-04,
        -6.4710e-03, -1.1578e-02, -1.9177e-04, -9.4627e-06,  1.0453e-02,
         7.0912e-03,  2.5922e-03, -6.1515e-03,  1.1133e-03], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8375e-04,  6.3410e-01, -5.0425e-03, -1.7511e-03,  1.8266e-04,
        -1.8504e-04,  2.5846e-03, -2.5237e-04, -1.0336e-03,  2.2978e-04,
        -1.4577e-03,  1.9969e-04,  2.0580e-04, -1.5124e-04, -2.9037e-04,
         3.2351e-04, -1.1443e-04,  7.9502e-04, -9.1821e-04, -3.2434e-04,
        -4.4455e-04, -5.8452e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9640e+00,  1.3601e+02,  7.5958e-01,  6.5505e-01,  3.5702e-01,
        -3.0746e-01,  2.9244e-01, -9.9977e-01,  3.7773e-01,  3.8852e-01,
         4.0630e-01,  9.3689e-03, -7.5942e-03, -2.7481e-01,  2.1299e-01,
         1.9302e-01,  3.6584e-01, -1.7385e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2671e-01,  3.3180e+01,  2.7782e-01, -1.0647e-01, -7.0131e-02,
        -1.4968e-01,  5.8556e-02,  5.5292e-02,  3.6842e-02,  3.3796e-02,
        -3.1579e-02, -3.9040e-02,  9.0599e-02,  1.7963e-01, -5.5767e-02,
         5.9749e-03, -2.1790e-02, -1.3638e-01, -3.0025e-02,  2.7942e-03,
        -1.1847e-02, -1.0493e-01, -1.5496e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.2084e-03,  1.0699e+00,  4.5955e-04,  8.1037e-03,  7.2700e-03,
         1.4100e-03,  2.7228e-03,  1.7586e-03,  1.3172e-03, -1.5300e-04,
         2.1182e-03,  1.9386e-03,  2.1013e-03,  8.0880e-04,  2.5549e-03,
         3.1169e-03, -2.7415e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7864e+00, -2.5716e+02,  2.2829e+00,  3.7762e-01, -8.1759e-01,
        -1.0704e+00, -9.2042e-01, -2.1009e-02,  1.1453e+00, -5.4206e-01,
        -2.7226e-01,  1.3625e+00, -4.1309e-01, -6.2627e-01, -3.2953e-01,
         4.2253e-01, -2.9026e-01,  1.3217e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8669e-02,  3.8539e+00,  1.1794e-02, -4.4598e-03,  1.2516e-02,
        -3.8349e-03, -7.0568e-04, -6.8412e-04, -3.8342e-03,  1.0812e-01,
        -3.8557e-03, -1.2866e-02, -3.4613e-03, -7.3103e-03,  5.9137e-03,
        -6.1033e-04, -1.1549e-02,  1.0002e-02,  7.3809e-03,  6.5845e-04,
        -1.9324e-03, -2.9749e-03,  5.5287e-03,  7.0691e-03, -6.7711e-04,
        -6.3728e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.5325e-02,  5.1507e+00, -1.9644e-02,  7.8855e-03, -1.5842e-02,
        -1.5371e-02, -4.8189e-03, -1.1401e-02,  4.3133e-03, -1.0566e-02,
        -3.3143e-03,  6.5097e-03,  1.8352e-02, -2.3739e-02, -1.2575e-02,
         1.3333e-02, -5.4351e-03,  1.2109e-03,  5.5721e-03, -1.3273e-02,
        -4.7016e-03, -2.2743e-02, -7.8659e-03,  8.1727e-03, -1.8220e-02,
         9.0169e-03,  1.6772e-03,  6.6272e-03, -1.6702e-02,  2.2350e-02,
         2.9846e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2950e-01,  1.1832e+01,  2.1405e-02,  8.7999e-03, -1.8698e-02,
         4.9022e-02,  6.3017e-03,  6.7828e-03, -4.0923e-03,  1.0316e-02,
         2.8897e-02, -1.6921e-02, -2.8510e-02,  1.3210e-02,  4.1195e-02,
         3.0430e-03, -1.4483e-02, -5.4334e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0204,  0.6681, -0.0045, -0.0086, -0.0033, -0.0046, -0.0057, -0.0080,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-4.4945e-01,  5.6552e+01, -2.8745e-01, -1.5784e-01,  1.0645e-01,
        -5.9225e-02,  6.5782e-02, -2.2711e-02, -5.9753e-02,  9.4869e-02,
        -1.6889e-02,  2.0217e-02, -7.7164e-02, -4.0063e-02,  1.2260e-01,
        -1.8914e-02, -2.3104e-02, -1.4036e-01,  6.8078e-02,  1.3293e-02,
         4.4329e-03, -2.9725e-02, -1.8840e-01,  4.1947e-02,  1.0301e-01,
         3.2950e-03,  1.2972e-01,  2.1919e-01,  1.0658e-03,  4.1909e-02,
         3.0126e-02, -4.5392e-02,  4.4582e-02, -3.2736e-02, -6.0894e-02,
        -2.8653e-03, -1.9580e-02, -2.8395e-02, -5.8444e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9158e-01,  6.9292e+01, -7.6158e-01, -2.5372e-01, -8.6007e-02,
         5.3416e-02, -1.0215e-01,  1.7604e-01, -4.2437e-02, -9.1872e-02,
        -1.1227e-01,  8.2312e-02,  2.4014e-02, -8.1051e-02,  1.1038e-01,
        -1.4250e-03,  5.2057e-02,  9.0738e-02,  1.0699e-01,  5.5803e-02,
        -1.6192e-01, -4.8751e-02, -4.9950e-02,  1.9563e-02, -1.3188e-01,
         5.8433e-02, -1.0755e+00,  1.3925e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0618e+00,  1.4742e+02, -5.8361e-01,  5.1058e-01,  2.7408e-01,
        -6.4294e-02,  1.6749e-01,  3.7089e-01,  8.2723e-01,  1.9841e-02,
         1.1893e+00,  5.6586e-01, -1.0063e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7127e-01,  4.1317e+01, -1.4737e-02, -7.6623e-02, -7.2599e-02,
         4.0140e-02, -5.9503e-02,  1.9865e-02, -4.1566e-02, -4.2847e-02,
         1.7902e-01, -5.2996e-02, -5.6222e-02,  1.0072e-01,  1.7361e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3888e+00,  9.5178e+01, -3.3806e-01, -2.8447e-01,  8.5204e-01,
         1.8300e-01,  1.9433e-01, -1.5630e-01,  1.9609e-01,  3.7742e-01,
         2.5941e-02, -2.9223e-02,  3.5258e-01,  1.3089e-01,  2.4607e-01,
        -5.3083e-02, -1.1170e-01,  1.4157e-02, -5.4850e-02,  5.3481e-01,
         5.3116e-03,  1.8398e-02,  3.1302e-01, -3.0455e-02,  1.7942e-01,
         1.4833e-01, -3.7883e-03,  3.1636e-01, -1.1561e-01, -4.2643e-01,
        -2.8142e-01,  1.0106e-01,  1.4949e-01,  2.5471e-02,  2.2139e-05,
         9.8797e-02, -4.0056e-02, -4.6426e-02,  5.8584e-02, -1.0451e-02,
         1.4665e-01, -2.3680e-02, -6.2991e-02, -1.6788e-02, -1.9059e-01,
        -4.5720e-01,  3.7702e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3501e-01,  7.0005e+00,  6.0779e-02, -8.2695e-03,  4.0084e-03,
        -2.6974e-02, -7.7970e-04, -3.7499e-04, -3.5671e-02, -8.6201e-03,
        -8.4310e-03, -1.1963e-02, -3.6443e-03, -1.0100e-02,  1.3213e-03,
         1.6307e-02, -1.1992e-03, -5.3246e-03, -2.0299e-02, -3.8859e-03,
        -9.7339e-04,  6.0243e-03, -2.5960e-03, -1.6817e-02, -7.9613e-03,
        -1.3130e-02,  1.3944e-02, -1.9898e-03, -4.3500e-03,  1.4162e-02,
         4.8275e-04,  5.5331e-03,  1.0598e-02, -5.3791e-03, -7.0424e-03,
        -1.6119e-02,  2.9037e-03,  5.4082e-03, -7.1168e-03, -8.4424e-03,
        -7.1888e-02, -2.0789e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2030e-01, -1.3233e+02, -4.2210e-01,  1.7413e+00, -6.2997e-01,
        -2.8249e-01, -6.8337e-01,  1.3168e-01, -4.4316e-02, -9.0386e-01,
        -8.8959e-02, -7.6496e-01,  1.9537e-01, -1.0209e-01, -6.9952e-01,
        -1.2804e+00, -4.1752e-01,  1.0773e-01,  9.3595e-02,  4.7678e-02,
        -8.4654e-02, -5.9419e-02,  3.3809e-01, -1.1529e-01,  4.0180e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0441e-01,  3.2548e+01, -1.3934e-01,  1.3380e-03,  1.1475e-02,
        -2.2596e-02,  1.1347e-01, -6.6119e-03,  1.0570e-01, -7.2109e-03,
         7.4624e-03,  2.5048e-02,  3.2294e-02, -2.9072e-01,  5.4136e-02,
        -3.7391e-02,  3.4829e-02,  2.5036e-02, -2.9172e-02,  8.5395e-02,
         8.7141e-02,  2.9929e-02, -1.8949e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0065e+00,  1.0012e+02,  3.8642e-01,  4.6650e-02, -5.3163e-01,
        -5.0132e-01, -4.0667e-02, -5.7642e-02, -6.0698e-02,  3.8190e-02,
         1.2338e-01,  1.8135e-01,  1.1804e-01, -2.0776e-01, -2.2043e-02,
        -9.1258e-02, -1.0676e-01,  1.8263e-01,  1.6282e-02,  1.4665e-01,
        -1.1049e-01,  5.4382e-01,  2.1656e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2715e-01,  5.0296e+00,  8.8603e-02,  2.4102e-02, -2.2125e-02,
         3.1326e-03,  5.4032e-03, -5.0801e-03,  5.4712e-02,  2.6765e-03,
         1.6812e-02,  1.8269e-03,  1.6210e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4111e+00, -3.0275e+02,  1.7927e+00,  8.7160e-01, -5.0389e-02,
         2.5038e-01,  5.6874e-01,  7.4417e-02,  1.0196e-01, -9.1812e-01,
         7.9409e-01, -4.6882e-01,  8.2078e-01,  2.6398e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7446e-02,  1.0422e+02,  3.1047e-01, -2.7841e-01, -1.9266e-01,
         5.0030e-02, -4.0601e-01, -1.1219e-01,  1.3145e-01,  2.3041e-01,
        -1.3999e-01,  3.9982e-02, -3.7100e-01,  1.0147e-02, -5.0839e-02,
        -3.0604e-02, -3.9499e-01,  1.8007e-02, -4.8961e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-5.2883e-01, -2.6568e+02, -9.4089e-01,  6.4834e-01, -2.1204e-01,
         1.0047e+00, -1.2593e-01,  7.0804e-01,  4.7996e-01,  1.1345e+00,
        -5.0197e-02,  1.7280e-01,  4.2819e-01, -1.0203e+00,  6.9508e-02,
        -1.7495e-01,  4.5348e-01,  2.2106e-01, -1.7025e-01, -1.4132e+00,
         5.4092e-01,  4.0387e-01,  5.5097e-01, -1.9755e-01, -3.2471e-01,
         2.0732e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0295,  2.4321,  0.0319, -0.0144,  0.0218, -0.0124,  0.0077,  0.0081,
        -0.0072, -0.0073,  0.0119, -0.0060,  0.0079,  0.0045,  0.0053,  0.0120,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9259e-01,  1.8271e+01,  1.2043e-02,  4.0719e-01, -1.5202e-02,
         1.4793e-01,  5.5190e-02, -9.7300e-02,  6.9583e-02,  1.4241e-02,
         3.4485e-02, -1.6698e-02,  3.2748e-02,  2.8459e-02, -3.7577e-02,
         3.1762e-02, -1.6371e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6378e-01, -2.9663e+01,  1.9536e-01, -2.3404e-01, -1.4548e-01,
         3.5035e-01, -2.4836e-02,  1.7937e-01, -2.0112e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3512e-02,  1.5419e+00,  2.3429e-03,  8.2297e-03, -4.3210e-03,
        -1.5983e-03,  1.5393e-03,  4.5825e-03, -1.9934e-03,  2.2132e-04,
         7.6968e-04, -3.4766e-03,  1.9028e-03, -1.0194e-03, -1.6039e-03,
         2.8529e-03,  1.2433e-03,  6.7871e-04, -7.5598e-04,  1.8763e-03,
        -3.2058e-04,  1.5191e-03,  1.9609e-03,  8.5830e-04, -1.1751e-03,
        -2.0980e-03, -1.0038e-03, -2.3368e-03,  4.6672e-03, -8.7960e-05,
         1.1903e-03, -9.4524e-06, -1.3995e-04,  1.6945e-03,  2.4240e-03,
         1.9187e-03, -7.8723e-04, -7.5974e-04, -9.8775e-05, -1.2398e-03,
        -1.9797e-03, -5.3255e-03, -3.6676e-03, -5.7029e-04,  1.2028e-04,
         3.0132e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3690e-03,  1.3480e-01, -5.0421e-04, -3.8147e-04, -1.8805e-04,
         1.6029e-04,  6.2709e-05,  1.9296e-04, -5.2827e-05, -8.0560e-05,
        -3.1191e-05,  1.5650e-04, -2.6377e-05, -1.5964e-04,  1.1349e-05,
        -3.8526e-04,  1.4224e-04,  5.2178e-05,  3.8336e-05, -1.6550e-04,
         1.4183e-04, -4.8562e-04, -1.4494e-03,  1.3831e-05,  5.0626e-04,
        -1.3230e-04,  3.5236e-04,  1.6071e-04,  2.1466e-04,  1.2063e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3820e-03, -4.1910e+00, -1.4024e-02,  2.7931e-02, -3.6459e-03,
         3.9005e-02,  2.5374e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3123e-01,  1.1041e+02, -1.6707e-01,  3.4742e-01,  6.4095e-02,
        -1.2359e-01,  1.9434e-01,  1.4738e-01,  4.2696e-01,  1.2389e-01,
         2.2119e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0035,  1.2661,  0.0391, -0.0065,  0.0084,  0.0048,  0.0015,  0.0019,
         0.0046,  0.0032, -0.0031, -0.0027, -0.0154, -0.0015,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1449e-02,  7.0978e+00, -7.7658e-04,  3.2692e-02,  1.0439e-02,
         4.9089e-03, -5.8387e-03, -1.6172e-02,  1.2581e-02, -1.9494e-02,
        -5.0839e-03,  1.6571e-03, -4.2434e-03, -6.3811e-02,  1.1360e-02,
         1.6791e-02,  1.3842e-02, -2.5539e-03, -2.1218e-03,  6.6285e-03,
        -1.7650e-02,  1.3656e-04, -1.7747e-03, -7.3902e-03,  9.9810e-03,
         1.1881e-02, -7.3325e-03,  2.8104e-03, -4.6733e-04,  7.2469e-03,
         1.8989e-03,  1.3749e-02, -1.5745e-03,  1.9266e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0609e-02,  1.2389e+00,  1.6330e-02,  3.1444e-02,  3.4634e-03,
         1.6159e-03, -1.7174e-03, -1.0892e-03,  6.1004e-04,  6.6097e-04,
        -2.5255e-03,  4.5442e-04,  5.9411e-04,  6.6143e-04,  1.7639e-02,
        -1.0553e-03, -6.1450e-03, -1.0514e-03, -2.8667e-03, -3.1961e-03,
        -3.6411e-03, -3.8463e-05, -8.1819e-04, -3.2767e-04, -7.3904e-04,
         4.2453e-04, -4.8247e-05,  3.6323e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2869e-03,  3.2539e-01,  8.9108e-04,  1.3499e-03, -8.6486e-04,
        -6.7278e-04,  1.5003e-05,  1.0231e-03,  1.3590e-03,  2.9324e-04,
         1.6044e-04, -1.7356e-03,  5.4097e-04,  2.5042e-03,  2.1153e-03,
         4.8102e-04,  2.6597e-03,  5.0911e-04,  2.4163e-04,  7.6538e-04,
         2.8162e-04,  1.0703e-03,  2.0667e-03,  4.4100e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 4.0484e-01, -9.8930e+01,  8.0010e-01, -6.4375e-01, -4.8741e-01,
         4.3504e-02, -3.9632e-01,  1.5658e-01, -4.7863e-01,  9.2104e-02,
        -8.5634e-03,  3.2109e-01, -9.6269e-02, -4.3201e-02,  3.3702e-02,
        -1.3766e-01, -1.4832e-01, -2.1917e-01, -4.2393e-02, -4.2721e-01,
        -1.0553e-01,  1.0307e-02, -1.1258e-01,  9.3231e-02,  2.8156e-02,
        -6.0873e-02,  1.7010e-01,  1.4155e-01,  2.4335e-02,  1.4302e-01,
        -3.7394e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.7749e-03,  2.2946e+00,  6.3319e-03,  1.9856e-02, -2.6556e-03,
         1.3634e-02, -2.0798e-03, -4.2533e-03, -6.2586e-03,  2.3620e-03,
        -1.1914e-02,  3.3676e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0651e-02,  7.5232e+00,  5.0031e-03,  2.3902e-02,  1.7545e-02,
         3.5146e-02,  2.4620e-02,  1.4428e-02, -6.5550e-03,  4.5245e-02,
         7.9329e-02, -1.1809e-02, -2.6195e-02,  7.5139e-03,  2.4410e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6651e+00, -3.8829e+02, -2.2341e+00,  1.1145e-01,  4.4885e-01,
         8.9302e-01,  4.3820e-01,  9.6951e-01,  1.7064e-01, -4.3724e-01,
        -1.8115e-01,  5.1385e-01,  2.6391e-01, -1.6178e-01, -8.0156e-01,
         4.3400e-01,  7.4952e-01, -5.1488e-01, -1.2854e-01,  6.5767e-01,
         1.8882e-01, -3.2670e-01,  1.6349e+00,  2.0605e-01,  3.2895e-01,
         2.1568e-01,  1.5894e-01,  2.4583e-01, -2.3523e-02,  2.0200e-01,
        -2.5279e-01,  3.2808e-01, -6.4305e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5925e-03,  1.0190e+00,  9.6027e-04, -5.6570e-03,  1.2948e-03,
         2.6450e-03,  1.5380e-04, -2.7518e-03,  9.1774e-04, -4.0019e-04,
        -1.9861e-03, -3.1107e-04, -3.6652e-05, -4.5534e-03,  2.7344e-03,
        -3.6408e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6206e-03,  3.3737e+00, -5.6444e-02,  1.6270e-02,  2.5865e-02,
         2.0053e-03, -3.8450e-02, -1.0580e-02, -4.0977e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([0.0531, 5.1181, 0.0137, 0.0528, 0.1161, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4360e-02, -1.3637e+00,  1.3206e-02, -8.0771e-03, -2.1394e-03,
        -2.6916e-03, -4.1599e-03,  1.1842e-03,  6.9107e-03,  1.0980e-03,
         4.0761e-04,  2.9190e-03, -2.7131e-05, -3.5976e-04, -6.5553e-04,
         2.4736e-04,  1.8516e-04,  9.5637e-04, -4.6939e-04, -2.6192e-03,
        -4.2561e-03, -9.0705e-04,  1.0895e-03,  3.9210e-04, -3.2513e-04,
        -5.7864e-04,  3.3678e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -0.3496, -13.0711,  -0.0538,  -0.0985,  -0.0238,  -0.0672,  -0.0380,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6552e+00,  2.0969e+02, -1.5117e-01,  1.3233e-01,  2.9247e-01,
         6.2247e-01,  4.0675e-01,  4.6443e-01, -1.1584e-01, -1.7834e-01,
         3.1884e-02, -6.4785e-01, -5.8976e-01,  1.4394e-02, -1.1491e-01,
         3.6923e-01,  3.2649e-01, -9.7255e-01, -1.2663e-02, -3.2178e-01,
        -6.3264e-01, -2.5593e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6449e-02,  8.5587e+00,  1.2558e-01,  3.2602e-02,  2.7625e-02,
         1.0433e-02, -1.0308e-04, -6.2193e-03, -1.4106e-03,  1.6069e-03,
        -2.3810e-03,  1.8264e-02,  9.0626e-03, -2.5553e-02,  1.0914e-03,
        -2.5949e-03,  7.2454e-03,  3.5156e-03,  3.5763e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2459e-02,  3.7551e+00,  3.0756e-02, -1.3027e-02,  2.1264e-03,
         1.6333e-02,  1.3073e-02, -1.3316e-02,  2.5992e-03,  9.9916e-03,
        -6.7783e-03,  3.6670e-03, -2.0033e-03,  3.8389e-03,  2.0890e-03,
         7.4027e-04, -2.4380e-03, -6.0011e-03, -2.2674e-03,  1.7778e-03,
        -7.8385e-04,  3.2719e-04,  9.1793e-03, -2.9623e-03,  6.5895e-03,
        -8.3306e-04, -1.5179e-03,  1.5574e-03,  4.4394e-03,  8.5429e-03,
         1.0662e-02,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-1.4878e+00,  1.3057e+02,  7.1577e-03,  1.3430e-01, -2.9147e-01,
         6.4665e-02,  1.2079e-01,  5.0578e-01,  1.9903e-01, -7.6071e-02,
         8.5807e-02,  1.1100e-01, -4.9611e-03,  4.9416e-01, -1.7227e-01,
         3.0920e-01,  2.9364e-02,  2.2377e-01, -2.2678e-01, -2.1015e-01,
        -2.7289e-02, -2.0272e-02,  2.7343e-01, -1.5003e-01, -1.3188e-01,
        -2.5673e-02, -2.3532e-01, -5.3698e-01,  9.4859e-02, -4.3848e-01,
         5.3149e-02, -3.3540e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6564e-01, -2.3779e+01, -5.6066e-04,  8.4672e-02, -1.6692e-01,
        -9.4161e-02, -5.5691e-02, -6.8332e-02, -8.7430e-02, -2.4691e-02,
        -7.7104e-02, -1.9907e-02, -7.8986e-03,  1.3472e-02, -7.6170e-02,
        -2.9560e-02, -4.0374e-02, -1.2345e-02,  1.3818e-02, -1.1329e-02,
         1.7292e-02, -2.0077e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0878e-01,  2.9746e+02, -2.8050e-01, -1.0393e+00, -3.1375e-01,
         5.6011e-02,  1.5260e-01,  1.2594e+00,  2.5715e-02,  2.3449e-01,
        -1.0519e+00, -1.0545e+00, -2.8083e-01,  2.8475e-01, -3.5968e-01,
        -1.5367e-01, -2.5735e-01, -1.6018e-01, -6.8875e-01, -2.4300e+00,
         1.2352e-01, -5.1896e-01,  9.6477e-02,  6.9006e-01,  5.0587e-01,
         7.5211e-01,  1.6394e-01, -5.1282e-01, -2.0953e-01,  1.3671e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8191e-01,  1.2841e+02, -9.8629e-01,  3.4487e-01,  3.4993e-01,
        -3.6405e-02, -3.5774e-02, -9.4897e-02, -1.3258e-02,  1.2809e-04,
         3.8455e-01,  2.2455e-01,  4.1077e-01,  5.6739e-02, -5.3112e-01,
        -7.4050e-02,  5.3236e-02, -4.3366e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5366e-01,  1.8684e+01, -2.9123e-03,  4.8639e-04,  3.4423e-02,
        -9.3026e-02, -3.3830e-02, -7.8300e-03, -9.0743e-02, -1.9991e-02,
        -3.7651e-03, -2.2795e-02, -3.0610e-03,  1.5979e-02, -1.1718e-02,
        -1.2660e-02,  1.5258e-02, -1.2049e-02, -4.2942e-02, -1.0821e-02,
         4.5506e-03, -5.2528e-02,  5.0671e-02,  8.2969e-03, -4.0921e-02,
        -5.7155e-03, -1.9906e-02,  4.4997e-03, -1.5129e-02, -1.2982e-02,
        -6.0138e-04, -3.3779e-03,  3.2860e-02, -4.6718e-02, -1.1487e-02,
        -1.5125e-02,  6.8865e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9778e-02,  9.3050e+00,  4.0596e-02,  1.0651e-01,  1.9844e-02,
        -1.4189e-02,  6.4660e-03, -7.6861e-02, -1.0495e-02,  2.7321e-03,
         1.4933e-02, -3.6588e-02, -2.8688e-02,  2.2759e-03,  2.2754e-02,
        -1.3709e-03, -2.7613e-02, -3.6604e-03,  1.5463e-03, -9.9749e-03,
         1.2343e-02,  1.2400e-02,  3.0708e-02, -2.0934e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9336e-01,  9.2528e+01, -4.2142e-01, -2.9266e-03, -9.9173e-02,
        -5.5185e-02, -2.2300e-01,  2.2691e-01, -7.6177e-02,  4.3490e-02,
        -3.7045e-02,  2.3874e-02,  1.3009e-02, -7.8893e-02, -1.5770e-01,
         7.5286e-02,  1.9546e-02, -5.3421e-02, -1.9829e-02, -5.2593e-02,
         8.1248e-02, -2.0175e-02, -6.4736e-02, -2.0259e-02, -8.2805e-02,
        -9.9479e-02, -1.1744e-01,  3.8879e-03, -1.0780e-01, -4.3627e-02,
        -1.2289e-01, -7.4033e-03,  2.4572e-02,  2.4908e-02, -4.1119e-02,
        -7.1533e-02, -4.2680e-02, -9.5427e-02, -2.6953e-02, -1.2341e-01,
         1.2947e-01, -9.2395e-03,  6.1937e-02, -1.3830e-02,  1.1143e-01,
         9.9876e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9863e-03,  2.1520e+00, -2.4208e-02, -9.1350e-03, -2.0040e-02,
        -3.1483e-03, -1.3189e-03,  5.8973e-04, -8.0450e-04, -4.2051e-03,
         4.7225e-03,  1.1128e-03,  1.5179e-03, -1.6728e-03, -4.7888e-03,
         2.5398e-03, -1.1964e-02, -4.7726e-03,  5.5706e-04, -7.9445e-04,
         2.2752e-04,  5.4809e-03,  2.3412e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.8921e-01,  6.4393e+01, -6.9987e-01, -1.1765e-01, -6.2164e-01,
         1.4952e-01,  3.5780e-01,  3.9416e-02,  3.1886e-01, -1.1343e-01,
        -1.5180e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1267e+00, -1.8271e+02, -6.6827e-01, -7.1390e-01, -6.0000e-01,
         4.1351e-01, -6.9829e-03, -1.8255e+00, -5.1969e-02,  4.4276e-02,
        -4.7724e-01, -3.2242e-01, -3.2981e-01,  1.2845e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3817e+00, -1.0755e+02,  2.2903e-01,  4.7090e-02,  5.6061e-02,
        -9.7357e-02,  1.1778e+00,  1.3410e-01,  3.1014e-01, -4.9682e-01,
        -9.7321e-01,  1.4392e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6926e-01,  3.4817e+01,  1.8318e-01, -4.1620e-02, -6.8352e-02,
         4.6783e-02, -1.3053e-02, -5.6474e-02,  2.4320e-02,  4.1315e-02,
        -8.4659e-02,  9.5837e-02,  3.2645e-02,  5.4448e-02, -9.8551e-03,
        -3.0710e-02,  4.9572e-03, -1.2868e-02,  6.6490e-02, -1.8080e-02,
         9.0037e-03,  1.2817e-02,  1.0331e-01, -4.0545e-02,  6.3245e-02,
         1.0675e-01,  4.3944e-01, -6.2921e-02, -4.5270e-02, -4.4516e-03,
        -2.4369e-02,  9.2988e-03, -2.1595e-02, -1.7922e-02, -1.2097e-02,
        -1.0528e-02, -1.8620e-02, -2.8722e-03, -3.1560e-02,  2.7435e-02,
         3.8402e-03,  3.9591e-02, -6.9468e-02, -6.6932e-03,  9.2916e-03,
        -1.3609e-02,  5.6930e-02, -9.3578e-04, -5.0368e-04, -2.5106e-02,
         2.6112e-02,  1.0523e-02, -3.3625e-03, -2.0836e-02,  5.7112e-02],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-6.0790e-02, -6.1233e+00, -5.8240e-02, -9.6259e-03,  1.9039e-02,
         4.3746e-03,  2.1520e-03,  3.4132e-03,  6.6123e-03,  7.0188e-03,
         7.1738e-02,  2.8408e-03, -4.2709e-03,  2.2314e-02,  1.3345e-02,
         1.0793e-02,  1.9487e-02, -5.9030e-03, -1.2537e-03,  1.3481e-02,
         1.2566e-02,  1.7198e-03, -5.0440e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5209e-01,  1.8245e+02,  5.9791e-01, -4.6283e-02,  2.5252e-01,
         2.2818e-01, -1.0271e+00,  2.3881e-01, -9.4630e-01,  6.9484e-01,
        -1.5426e-01, -3.5025e-02, -2.8992e-01,  2.7267e-01,  2.2398e-02,
        -3.6753e-01, -2.7822e-01,  1.1856e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6406e-01,  2.0022e+01,  1.9966e-02, -1.3182e-01,  6.4016e-02,
         3.8268e-02, -2.6324e-01,  1.6195e-02, -8.3785e-02,  3.0450e-02,
        -5.0357e-02,  1.0352e-03,  1.8563e-02,  2.2031e-02, -8.1835e-02,
        -1.2442e-01, -6.4574e-02, -1.0375e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0502e-01,  3.0628e+01, -1.7991e-02,  3.6380e-02, -1.1475e-01,
         4.4032e-03,  7.7312e-02, -3.0719e-02,  6.2740e-02, -3.1028e-02,
        -9.1480e-02,  9.2880e-02,  1.2599e-01,  2.4315e-01,  1.5313e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.7538e-02,  2.8640e+00, -2.8528e-04,  7.9439e-03,  5.0820e-03,
        -4.5382e-03,  1.4281e-03,  2.5072e-03,  4.7072e-03, -3.1115e-03,
         7.2975e-03, -7.0110e-03,  3.8706e-03,  2.7511e-03, -6.1877e-03,
         6.9226e-04,  3.1324e-04, -5.2262e-03, -2.2337e-03,  5.8643e-03,
         7.2857e-03,  1.6937e-03, -1.7714e-03,  1.5836e-04, -8.1352e-03,
         2.4910e-03,  1.0020e-02,  1.0785e-04,  2.7404e-03,  8.6653e-03,
        -7.0562e-03,  5.9291e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8533e-01,  8.5775e+01, -1.4175e-01, -4.1242e-01, -1.2717e-01,
        -1.0406e-01, -1.6338e-01, -2.2443e-02, -3.7326e-01, -6.9934e-02,
         5.3105e-02, -3.3322e-02, -2.2789e-01, -4.5156e-02, -2.8124e-02,
        -4.0572e-02, -8.8419e-02, -1.5740e-01,  4.4327e-02,  2.2181e-02,
        -4.2917e-02,  2.8208e-02, -2.1550e-01, -1.6362e-01,  3.6807e-02,
         2.0831e-02,  1.3777e-02, -2.8814e-01, -5.0890e-02, -3.4471e-02,
         1.0717e-01, -3.7151e-02, -5.8859e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5161e-01,  2.5269e+02,  1.1001e+00,  7.6941e-01,  3.0812e-01,
        -3.3303e-01,  8.7524e-02, -3.4838e-01,  4.7116e-01,  9.4380e-02,
         1.1411e-01, -8.0827e-02, -2.6980e-03, -3.1034e-01, -6.1685e-02,
         9.2865e-02,  2.1452e-01,  1.8695e-01,  4.7591e-02, -1.6556e-01,
        -4.1043e-02, -2.2190e-01, -1.1030e-01, -3.2429e-01, -1.4665e-01,
        -1.8311e-02,  2.0474e-02,  2.7004e-01,  1.8757e-01,  3.3685e-01,
         2.3211e-01,  5.2467e-02,  8.3855e-02, -1.3829e-01,  7.8129e-02,
        -2.5461e-01, -4.0982e-01,  1.7856e-01,  1.7435e-01, -1.9603e-01,
         1.9277e-02, -1.7013e-01,  1.7969e-01, -2.3933e-02,  3.3716e-01,
         1.0406e-01, -1.2010e-01,  6.0214e-03,  6.7996e-02,  8.3262e-02,
        -1.7225e-01,  2.0580e-02,  3.8363e-02,  1.0821e-01,  4.3163e-01,
         3.7930e-01, -1.5231e-02, -1.4559e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7889e-01, -1.5592e+01,  2.3181e-01,  1.2803e-01,  4.6953e-02,
         5.2772e-02,  1.5515e-02,  3.3162e-02,  7.5641e-02,  7.3236e-02,
         4.9828e-02,  6.1279e-02,  1.1360e-02, -7.7918e-03, -1.4125e-01,
         2.6843e-02,  4.2692e-02,  2.8502e-02,  2.5552e-02, -4.0557e-02,
        -3.3901e-03,  1.2902e-03,  7.9636e-02,  8.6262e-02,  1.4381e-02,
         4.4354e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.9934e-02,  1.1917e+01, -5.4844e-02,  4.1281e-02, -4.6995e-02,
         2.2487e-02,  8.0312e-03, -1.4066e-02,  7.2411e-03,  9.4792e-04,
        -5.8885e-03,  1.9608e-02, -5.7212e-02,  2.8909e-02, -6.8430e-03,
        -2.6522e-02, -1.4511e-02,  1.0953e-02,  1.9194e-02,  4.2516e-02,
        -2.1308e-02, -1.2560e-02,  1.2085e-02,  2.2032e-02,  1.5962e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7808e-01,  5.9995e+01,  4.0189e-01, -2.2842e-01, -1.7903e-01,
        -4.1976e-02, -7.4900e-02, -7.2181e-02,  1.4648e-03,  2.7919e-02,
        -7.0601e-03, -1.2051e-02, -5.7950e-02,  2.4678e-03, -1.3278e-01,
        -5.2516e-02, -7.3149e-02, -3.1058e-02, -2.7176e-02,  3.5930e-02,
        -2.6131e-02, -7.1115e-03,  6.5757e-02,  3.6059e-02,  7.5632e-02,
        -1.0780e-01,  2.4174e-01,  2.0426e-02, -2.8137e-02, -8.1328e-03,
        -1.6011e-01, -7.4606e-03, -1.0968e-01, -7.9264e-02, -4.3163e-02,
        -1.2818e-01, -4.5160e-02,  2.7727e-02, -8.0381e-02, -1.2734e-01,
         7.5352e-02, -2.5225e-02,  3.2971e-02,  6.9384e-02,  1.6104e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3554e-02,  5.3541e+00, -1.6009e-02,  1.2546e-02,  1.8050e-02,
         1.6965e-02,  1.8947e-02,  2.2512e-02, -1.6432e-02, -2.1298e-03,
         9.4900e-03,  1.5570e-02,  2.7870e-03,  2.0662e-02, -8.3947e-03,
         1.9999e-03,  5.6800e-03,  6.4386e-03,  3.3038e-03,  7.3790e-04,
         1.8937e-02,  5.0151e-03,  4.5175e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8247e-01,  1.3256e+02, -7.7928e-01, -6.4987e-01, -3.8486e-01,
         1.4429e-01, -1.2091e+00, -1.2433e+00, -1.0499e-01,  3.1426e-01,
        -3.5983e-01,  1.2293e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.9962e-02,  3.7253e+00,  1.1994e-02, -2.1225e-03, -4.5490e-03,
         1.5262e-02,  2.6018e-02,  4.2156e-03,  2.7394e-03, -1.7465e-04,
         3.3459e-03,  5.7665e-03, -3.9777e-02,  6.8782e-03,  5.6555e-03,
         1.5723e-02,  8.1887e-03, -4.5712e-03,  4.2735e-03,  2.2329e-03,
         1.9476e-03,  7.8450e-03,  8.7372e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.0467e-01,  1.3559e+02,  4.3334e-01,  2.1142e-01, -3.2298e-01,
         2.8713e-01, -4.2560e-01, -5.5420e-02, -7.2428e-02,  8.7474e-02,
         1.8384e-01, -1.0977e-01,  2.7841e-01,  3.7725e-02,  3.9022e-01,
         8.8698e-02, -7.8798e-02,  2.2580e-02, -9.9769e-02,  8.4076e-02,
         3.1543e-01, -9.5953e-02, -6.6570e-02, -6.1829e-02, -3.4481e-02,
         1.0773e-02, -1.9629e-01,  4.2021e-02, -2.4268e-01, -1.0931e-01,
         6.8317e-02,  1.6234e-01,  5.8917e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5355e-03,  1.9011e+00, -1.5638e-02, -1.0954e-03,  8.4269e-04,
        -2.0224e-03, -5.6604e-04,  1.5414e-03,  6.2788e-04, -2.3820e-03,
        -1.0324e-03,  2.7042e-05,  6.7752e-04,  2.6963e-04, -6.8327e-03,
        -1.2393e-03, -2.1147e-03, -1.3987e-03,  4.9447e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.9989e-02,  1.3591e+01, -6.9973e-02, -9.0893e-02, -1.5165e-02,
         1.0914e-01,  4.9360e-03, -2.8188e-02,  1.7527e-02,  1.4145e-02,
        -1.1711e-01, -1.2504e-02,  1.0134e-01,  6.7284e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4268e-02,  1.2898e+01,  1.6059e-02, -3.8720e-02, -8.9351e-03,
         1.8204e-02,  2.6910e-02, -3.1771e-03, -1.9482e-02,  5.5098e-04,
         6.4162e-03,  7.8105e-04,  9.6601e-04,  3.9366e-02, -5.2995e-03,
        -4.2892e-03,  5.4880e-04,  1.9920e-03, -2.4719e-05, -1.6433e-03,
         7.5779e-03,  4.9974e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.4924e-02,  1.4282e+01,  5.3840e-02, -3.7049e-02,  1.0147e-02,
        -6.2758e-03,  5.7416e-03,  1.1063e-02, -1.0561e-02,  4.5049e-02,
        -3.5915e-02,  2.8425e-02,  1.4117e-02, -7.0075e-03, -1.3022e-02,
        -1.6991e-02, -6.3582e-03,  1.6431e-02,  2.0380e-02,  9.0861e-03,
        -6.3651e-03,  2.7645e-02,  6.5663e-03, -2.9795e-03, -1.9391e-02,
        -9.6771e-03,  4.8817e-04, -4.6932e-03, -9.3221e-03,  4.2129e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.4110e-01,  3.3841e+01, -1.0375e-01, -6.0123e-02,  5.9597e-02,
         9.3326e-02, -1.2651e-01, -1.6040e-01,  8.4971e-03,  2.6304e-02,
         1.3531e-01, -1.5129e-02,  4.4022e-02,  1.1870e-01,  5.0004e-02,
         4.0158e-02,  7.0301e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8718e-02,  8.7199e+00, -1.2364e-02,  5.9722e-03,  1.4439e-02,
         8.2946e-03,  9.5196e-03, -6.2221e-02,  7.0034e-02, -6.2620e-04,
         1.9686e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3888e-01, -2.2914e+01, -1.3316e-01,  1.1547e-01,  7.9085e-02,
        -4.4710e-02,  1.4932e-01,  1.8301e-02,  2.1049e-02, -9.3235e-03,
         6.1568e-04, -5.6634e-03,  8.7498e-02,  8.1331e-03,  2.8751e-02,
         1.1512e-01,  6.5493e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3545e-02,  5.4182e+00, -1.2382e-02,  9.2752e-03, -2.6247e-03,
        -2.3013e-03, -3.0825e-03, -1.1164e-02,  8.2341e-03, -9.0339e-03,
         4.3163e-03,  1.1626e-02,  4.2840e-03,  2.2122e-02,  9.3103e-03,
         7.4837e-03,  4.7431e-03,  5.1620e-03, -2.6653e-03,  1.0576e-02,
         6.1852e-04, -9.0309e-04,  1.3647e-02,  2.3175e-03, -1.1128e-03,
        -1.5643e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5309e-01,  1.3329e+01, -7.2624e-03,  1.2061e-02,  2.6636e-02,
        -6.7456e-03, -3.2165e-05,  1.1130e-02,  4.7197e-03,  6.4383e-03,
        -3.8476e-03, -4.1822e-02, -1.3891e-02,  3.5321e-02,  1.1256e-02,
         5.1461e-03, -2.2288e-02, -1.0611e-02,  8.1052e-03,  1.9756e-02,
        -4.0966e-02, -1.3910e-02, -1.2846e-02, -3.4108e-02, -6.5680e-02,
        -1.8491e-02,  3.2440e-03, -1.8227e-02, -6.6658e-03,  5.6710e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5473e-02,  2.3057e+00, -2.7089e-03, -6.1605e-03,  9.9988e-03,
        -4.5936e-03,  4.5707e-03, -9.4859e-04,  4.0752e-03,  3.1929e-03,
         1.5142e-03, -4.8840e-04, -2.7297e-03, -4.2270e-03, -4.8422e-03,
        -1.8641e-03, -2.2482e-03, -4.5143e-03,  5.8271e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 3.1536e-04,  2.5240e-01,  1.6908e-03,  2.6289e-03,  1.2550e-04,
         8.5088e-05,  2.0557e-03, -2.2305e-04, -7.8706e-04, -4.1881e-04,
        -2.4785e-05,  1.5405e-04, -3.1530e-05, -3.0923e-04, -3.4921e-04,
        -1.3150e-03,  2.3562e-04,  7.2576e-04, -2.8630e-04,  9.8606e-04,
         4.0976e-04, -5.2901e-05, -5.3260e-05, -2.9281e-04,  1.2638e-04,
        -1.7689e-03, -7.6338e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0355e-03,  6.7274e+00, -6.1501e-02,  8.4371e-02,  1.8589e-02,
        -3.5610e-02, -2.5625e-02,  8.2813e-03,  7.5450e-03,  1.6268e-02,
         2.1078e-02,  6.0764e-02,  2.3153e-02, -7.7357e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9836e-03,  2.2497e+00,  6.5864e-03, -6.8574e-03, -5.1408e-03,
        -1.9712e-02,  1.1309e-03,  1.2596e-03,  1.2911e-03, -2.8707e-04,
         1.8733e-03,  1.5059e-03, -1.3755e-03, -3.9107e-03, -4.2530e-03,
        -3.8854e-03, -6.2941e-04, -9.2685e-03, -5.8520e-03,  3.9401e-03,
         1.3715e-03, -4.5612e-03, -4.5679e-03, -3.9747e-03, -2.8600e-03,
        -6.8326e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0408e-01,  1.8771e+01,  5.9142e-03, -8.5952e-02, -4.4162e-02,
        -1.7881e-01, -3.5850e-02, -4.0155e-02, -6.1105e-02, -6.9505e-02,
         1.8683e-02, -1.5830e-02, -2.7470e-02,  2.6252e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1609e-02,  1.8252e+01, -2.5069e-02,  4.1185e-02,  3.4870e-02,
        -6.5979e-03,  9.7202e-03,  6.9239e-02,  1.8965e-02,  6.5701e-03,
         2.3865e-02, -1.5067e-02,  3.4194e-02, -5.0375e-02,  4.8551e-02,
         9.8720e-04, -4.8362e-03, -2.0977e-02,  4.2264e-02, -3.1487e-02,
        -1.5196e-02,  3.2532e-02, -9.2694e-03,  4.5616e-02,  4.0760e-02,
        -1.0924e-02, -6.2990e-02, -2.2772e-03, -2.1512e-02, -1.0923e-02,
        -1.4954e-02,  3.0970e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.6351e-04,  5.7299e-01,  3.9488e-03, -9.6205e-04,  4.6880e-04,
         4.5376e-04, -3.4708e-03, -4.3126e-04,  4.7931e-04,  1.9109e-04,
         2.1800e-03, -3.6088e-04,  9.7661e-04, -2.8264e-04, -4.0239e-04,
         4.9353e-04, -4.0569e-04,  1.3176e-03, -1.3641e-03,  1.1159e-03,
        -5.6361e-04,  1.0222e-03,  1.0621e-04,  3.3120e-04, -1.4817e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([  0.3158, 119.1816,   3.5769,  -0.4241,  -0.1357,   1.4770,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1727e-04,  9.1741e-02, -4.7495e-04, -2.6858e-04,  2.2872e-04,
        -2.6440e-04,  2.7367e-04, -1.2850e-04, -5.2739e-05,  3.5424e-05,
         8.5862e-05,  3.0139e-05,  7.8368e-05,  1.2376e-04,  1.5198e-04,
         3.4446e-05,  6.3898e-05,  5.2333e-05, -9.1532e-05,  1.8405e-05,
        -1.8258e-04,  3.7659e-05,  1.9786e-04,  1.4300e-04, -6.5726e-05,
         2.8976e-04, -1.4255e-04,  1.8429e-04, -4.0679e-05,  5.3984e-05,
         1.4373e-04, -1.3728e-04,  4.7831e-05, -8.7491e-06,  6.4118e-05,
         1.2162e-05, -9.0715e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3123e+00, -3.2184e+02,  2.0498e+00, -2.3524e+00,  3.0382e-01,
        -2.4105e-01,  5.6311e-01, -2.6592e-01,  6.8808e-01,  2.5388e-02,
         4.1071e-01,  2.8161e-02, -1.9543e-01,  3.6804e-01,  4.6601e-01,
         2.1614e-01,  2.8348e-01, -8.1003e-01,  2.5887e-01, -6.9875e-01,
         2.6287e+00,  2.4930e-01,  4.5094e-01,  3.9398e-01,  1.8469e-01,
         1.3544e+00,  2.7177e-01,  2.3796e-01,  6.4753e-01, -5.0718e-03,
         3.7532e-01, -6.8491e-02, -1.5582e-01,  1.2838e-01,  5.5129e-02,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8009e+00,  6.6792e+01,  1.5691e+00,  1.1243e-01,  1.1133e-01,
         4.9114e-02,  3.1661e-02, -2.2899e-01,  6.1556e-02,  4.0216e-01,
        -1.7941e-01, -5.2728e-01,  5.3674e-01,  4.0184e-01,  5.7453e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6132e-02,  1.4325e+01,  1.4851e-01,  9.6762e-03,  3.2657e-02,
        -2.4741e-02, -2.8492e-03, -3.9793e-03, -2.1399e-02, -3.4315e-03,
        -6.9163e-03,  2.4175e-03, -2.7446e-03, -1.5447e-02,  3.7790e-03,
         1.1490e-02, -5.8811e-03, -1.3135e-02,  2.7546e-02, -4.3293e-03,
        -4.3372e-04,  1.7277e-03,  3.8911e-03,  4.4140e-04, -2.9254e-02,
         1.6043e-03,  1.6698e-02,  3.6507e-03,  3.5889e-03,  8.0904e-03,
        -8.0892e-04, -1.0554e-02,  5.4345e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.1254e-01,  1.6662e+02,  9.5565e-01,  1.0035e-02, -2.0362e-01,
        -7.0695e-02, -2.4612e-01, -1.4755e-01, -3.5826e-01, -7.9708e-02,
        -9.3351e-01, -1.0045e+00, -1.7948e-01, -9.6208e-01, -5.4603e-01,
        -9.1234e-02, -2.5837e-01,  3.2079e-02,  2.2325e-01,  2.4096e-01,
         1.5589e-01, -2.2263e-01, -9.1798e-02, -3.0748e-02,  1.9145e-01,
         4.1057e-01, -1.0006e+00,  4.4221e-01, -1.1871e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 1.1821e-01,  2.2373e+01,  4.2132e-04,  9.1553e-02,  4.9854e-03,
         1.4437e-02,  2.8688e-02,  8.0817e-02, -3.4053e-02,  7.3949e-03,
         1.9763e-02,  4.4887e-02,  4.2847e-02, -5.5291e-02, -1.6745e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4701e-02,  2.9133e+01, -4.2263e-02,  1.7822e-02, -1.0179e-02,
        -7.9875e-02,  1.1231e-02, -7.3072e-02,  2.2412e-02, -1.7901e-02,
        -5.1341e-02, -3.3470e-02,  4.5415e-04,  2.9430e-02, -4.7023e-03,
        -7.7608e-02, -1.6356e-02,  3.3465e-02, -1.3179e-02, -1.1217e-02,
         1.3617e-02, -4.3196e-02, -2.7375e-02,  1.3910e-02,  1.0330e-02,
         1.2630e-01, -2.1412e-02, -1.1933e-02, -3.9703e-02,  1.5320e-02,
         1.5953e-02, -2.7259e-02,  6.8527e-03,  1.6449e-02,  2.1111e-02,
         3.1293e-02,  8.7725e-03,  2.8736e-02, -1.1785e-02,  6.7793e-03,
         6.4032e-03, -8.0215e-04, -5.8127e-02, -1.1242e-02, -1.5731e-02,
         1.4590e-02, -1.3741e-02, -1.6732e-02, -8.4051e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0093e-01, -1.5763e+01,  2.4039e-03,  5.8666e-02,  4.4851e-02,
         2.0400e-02,  6.4159e-02,  3.9279e-02,  5.2267e-02, -2.9868e-02,
         1.6985e-02, -3.5389e-03,  3.3378e-02,  1.5558e-02,  1.3131e-02,
         1.9286e-02, -5.5010e-02,  2.7895e-03,  5.5272e-03,  4.1330e-02,
         1.7287e-02,  4.9692e-03, -4.9219e-03,  5.0940e-03,  1.6167e-02,
         3.3332e-02,  4.6294e-02, -1.5129e-02, -3.0574e-02,  3.8294e-02,
        -4.9670e-03, -1.9338e-02,  5.6980e-03,  2.3646e-02, -4.5313e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.9444e-02,  3.2162e+00, -4.4204e-03,  2.9791e-03, -4.4364e-03,
         3.7614e-03, -8.3842e-04,  4.2592e-03,  4.8186e-05, -1.1196e-02,
        -1.3647e-02, -5.4593e-03,  3.0701e-03, -6.2576e-03,  7.1834e-03,
        -3.9206e-03,  3.7665e-03, -2.8073e-03,  3.4654e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7450e-03,  4.8168e-01,  1.1006e-03, -1.8238e-04, -5.7502e-04,
         3.1271e-04, -1.5024e-03, -2.8210e-04,  3.8154e-04, -1.3943e-03,
         3.5539e-04,  1.0280e-03, -2.7148e-04, -3.0371e-03, -7.4571e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2451e-02,  3.3449e+00, -2.0093e-02, -1.5450e-03,  1.8389e-02,
         2.2598e-03,  3.0773e-03,  2.8834e-03,  3.0763e-03,  1.3065e-03,
        -1.1045e-04, -2.7912e-03, -9.1749e-03, -6.6351e-03, -6.8765e-03,
        -5.6981e-03,  3.1830e-04,  1.3460e-03, -2.8383e-03,  8.0709e-03,
        -1.6424e-03,  1.5268e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0804e-02,  8.1017e-01, -3.2143e-03, -2.3844e-03, -1.3827e-03,
        -5.0564e-03, -2.0740e-03,  1.1768e-04, -2.7150e-03, -7.4128e-04,
        -3.2642e-04, -2.1428e-04,  1.1041e-03, -4.1048e-04, -2.2939e-04,
        -1.1281e-03, -1.8074e-03, -2.3867e-03,  1.6771e-04,  6.2340e-04,
        -1.3170e-03,  3.0268e-04, -1.8743e-04, -1.1064e-03, -1.0533e-03,
         9.9176e-05, -2.9622e-04,  3.4896e-04,  1.7213e-04,  2.3056e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8773e-03,  5.7565e-01, -7.3164e-03, -4.3080e-03, -2.4863e-04,
         1.8382e-03,  1.1661e-03, -3.1597e-03, -6.1476e-05, -9.9823e-04,
        -6.8703e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0091e-03,  4.2228e-01, -1.4939e-03, -5.1233e-04, -7.7446e-04,
        -1.1134e-03, -6.0576e-04, -3.7586e-04,  8.9916e-04, -2.7897e-04,
        -1.9181e-04, -7.8363e-05, -3.8153e-05, -4.1634e-04,  2.3772e-05,
         1.3354e-03, -1.3762e-03,  4.3122e-04, -1.5012e-03, -3.9863e-04,
         1.3817e-03, -3.0720e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9590e-01,  7.3143e+01,  5.7372e-01, -5.3827e-02,  5.7470e-02,
        -6.6115e-01, -4.7213e-02,  1.0632e-01, -1.7732e-01, -1.2040e-01,
        -2.1196e-01,  1.1239e-01,  1.6290e-01, -1.3859e-01, -4.3387e-02,
        -1.6817e-01, -4.4108e-02, -2.4428e-01, -3.5345e-01, -4.8226e-02,
        -1.4105e-01, -1.0231e-02, -1.3356e-01,  3.8060e-02,  1.7859e-01,
        -5.2078e-02,  7.9254e-02, -8.6628e-03, -1.0506e-01, -8.0874e-03,
         1.2486e-01, -1.4621e-01,  1.3144e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3715e-01,  7.3839e+01,  1.3015e+00,  1.0536e-01,  2.3177e-01,
        -2.4700e-01,  1.4790e-01, -1.5607e-01,  3.4487e-01, -3.6820e-02,
         1.2768e-01,  7.1162e-02,  2.6469e-01,  1.6322e-01, -6.8809e-02,
         3.2096e-01,  2.6586e-01,  2.0473e-02, -3.5093e-02, -6.2351e-02,
        -1.6857e-01, -2.0792e-01,  8.8706e-02,  1.0593e-01, -3.0412e-01,
         1.2033e-01,  1.7832e-02,  2.0343e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1995e-02,  3.0964e+00, -1.8382e-02, -5.1549e-03, -6.9286e-03,
         5.4503e-04,  6.6439e-03,  1.5409e-03,  1.3362e-04,  8.3141e-03,
        -8.8904e-03,  9.8781e-03,  9.5503e-04,  3.5558e-04,  3.2739e-04,
        -1.6199e-03,  1.3318e-03,  2.1143e-03,  3.3929e-03, -3.6313e-03,
         3.0509e-04,  2.7628e-04,  9.6994e-03,  7.9548e-04,  9.4239e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 4.2271e-02,  4.7703e+00,  7.0094e-03,  1.1041e-02, -3.1225e-02,
         1.0847e-02, -5.7253e-03, -4.5936e-03,  1.0733e-02,  9.8730e-04,
         1.0133e-02,  1.2075e-02,  7.8883e-03,  4.0841e-03, -5.6507e-03,
        -2.3429e-02, -1.9942e-04,  1.0906e-02,  4.9630e-03, -2.6190e-03,
         8.9338e-03,  3.1212e-03, -4.7800e-03, -4.1791e-04,  9.7757e-04,
        -1.0400e-03,  3.2266e-03, -8.2462e-03, -1.7813e-03, -6.3563e-03,
        -5.7832e-03, -6.2290e-03, -8.0836e-03, -3.7556e-03, -3.0241e-03,
        -6.8617e-03, -1.0100e-02, -8.5181e-03,  5.1932e-03, -1.3104e-02,
        -1.1378e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4613e-02,  1.4954e+00,  1.1143e-02,  1.0702e-03,  5.1708e-03,
         1.6635e-03,  6.0293e-03, -3.9845e-04,  9.8067e-04, -3.0670e-04,
        -1.8329e-03, -5.3152e-03,  2.8038e-03, -8.7219e-04,  1.5086e-02,
        -5.1062e-03,  3.3721e-03,  1.7820e-03,  3.0123e-03,  6.5383e-04,
         1.5391e-03,  3.3677e-03, -4.3756e-04,  6.1885e-05, -5.3422e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5978e-01,  2.5682e+01,  3.5603e-02,  2.3606e-02, -7.1769e-03,
        -5.1262e-02,  9.3751e-02,  3.7274e-03,  4.3711e-03, -2.6308e-03,
        -3.8862e-02,  1.8797e-02,  1.7362e-04, -1.5508e-02, -4.2818e-02,
        -8.2198e-03, -5.1540e-02,  6.6907e-02,  1.0165e-02,  2.0800e-02,
        -6.2254e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.8223e-03,  6.1756e+00, -9.5340e-02, -4.0020e-02, -3.6728e-02,
        -1.1203e-02, -2.1975e-02, -1.8516e-02, -1.2957e-02, -3.6571e-02,
        -2.9377e-02, -2.6213e-02, -1.7242e-03, -1.1040e-02, -1.6165e-02,
         9.6386e-03,  8.1119e-03,  5.6024e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2628e-02,  2.4473e+00, -8.9390e-03,  3.3992e-04,  4.9631e-03,
         4.7728e-04,  1.1761e-02, -5.2094e-03, -4.9956e-03,  1.0518e-03,
         1.2929e-03, -4.3609e-03,  4.2902e-04,  1.6579e-03,  6.3886e-04,
         3.4492e-03,  2.1300e-03, -4.8959e-03,  4.2082e-03, -4.1619e-03,
         1.9134e-03, -1.4097e-03,  8.3350e-04,  2.2968e-03,  3.9081e-04,
         1.5097e-03,  3.6591e-03,  1.0946e-03, -8.0896e-04,  2.0650e-04,
         7.5064e-04,  1.9570e-03,  7.5786e-05, -2.9997e-03,  4.1472e-03,
         4.7112e-03, -5.8361e-03, -1.0912e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8554e-01,  5.4422e+01, -7.8343e-01, -1.4298e-01,  4.0012e-02,
         6.4678e-02, -9.8765e-02, -1.2426e-02,  7.8273e-02,  6.4232e-02,
        -3.7921e-02, -9.9395e-02, -5.1888e-02, -1.2807e-01,  1.1373e-01,
        -3.4059e-02, -2.0917e-02, -3.5873e-02, -1.2598e-01, -8.5628e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6610e-02,  3.9421e+01, -1.3351e-01, -1.0989e-01,  2.7079e-02,
        -1.3969e-01, -6.7372e-02, -1.9924e-01, -1.0326e-03,  1.0925e-01,
         2.5396e-01,  7.0899e-02, -6.8289e-02,  1.1881e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3477e-02,  1.4005e+00,  8.3539e-03,  3.8664e-04, -1.0014e-03,
        -2.2094e-03, -9.2471e-04, -9.8398e-04, -8.5324e-05,  1.6338e-03,
        -9.0034e-05,  7.1648e-03, -6.5079e-05, -1.0211e-03,  5.1082e-03,
         7.5929e-04, -7.2510e-04, -2.2305e-03, -2.2798e-04,  2.6904e-04,
        -3.7142e-04,  4.3668e-03,  1.6259e-03,  3.1387e-03, -1.3648e-03,
        -1.8425e-03,  7.6166e-04,  7.3773e-04,  6.2370e-06, -5.0532e-03,
         1.2040e-05,  1.5451e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4856e+00, -2.3813e+02, -9.0078e-01,  5.4257e-01,  6.9522e-01,
         1.6555e-01,  2.7418e-02,  3.7664e-01,  3.4272e-02,  5.3570e-01,
         7.2662e-02, -7.3408e-01, -1.1929e-01, -3.6624e-01, -2.5282e-02,
        -2.9484e-01, -2.7437e-02,  7.1876e-01,  7.1897e-01,  8.2996e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2305e-02,  1.8912e+01,  1.9766e-01,  1.0443e-02, -2.7819e-02,
        -8.9840e-03, -9.3515e-03, -1.2871e-02, -3.3712e-02, -3.1330e-02,
        -3.6993e-02, -2.0626e-02, -4.3519e-03, -4.2734e-03, -1.6665e-03,
         3.1545e-04, -5.5322e-03, -1.1152e-01,  2.6754e-03, -8.4159e-03,
        -6.8349e-03, -1.2021e-02, -1.3243e-02,  2.3239e-02, -4.3620e-03,
        -4.3885e-02, -1.4624e-01, -1.3406e-02, -1.0722e-02,  3.6027e-03,
         3.1299e-03,  1.3760e-02, -1.5296e-02,  5.1176e-02,  3.0784e-02,
         7.8674e-02,  7.9691e-03,  3.1264e-03,  3.6311e-04, -2.4724e-02,
        -3.7522e-03, -2.6021e-02, -3.3070e-02, -3.9701e-02,  2.2048e-03,
        -7.8584e-02,  4.6151e-02,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1245e-01,  2.3349e+01, -8.2944e-02,  6.4560e-03,  1.7694e-01,
         1.6941e-02,  3.0381e-02, -6.6960e-02, -6.3497e-02,  2.6631e-02,
        -8.5465e-02, -8.1721e-03,  8.0294e-02,  5.9307e-02,  4.9779e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9388e-03,  8.1338e-01, -4.3304e-03,  2.0920e-04, -1.6534e-03,
        -1.1694e-03,  1.6845e-04,  6.6573e-04, -3.5654e-04, -2.5018e-04,
        -6.8226e-04,  2.7929e-04,  2.6370e-04,  1.9097e-03,  6.1098e-04,
        -4.6033e-04, -1.8176e-03,  1.1330e-03,  1.7712e-03,  1.6522e-04,
         4.4281e-04, -1.7388e-03,  9.0671e-04,  3.6089e-04,  4.1176e-04,
         5.3470e-04,  3.4528e-03,  6.6879e-03,  4.6388e-04,  1.2613e-04,
         3.1601e-03, -2.2506e-04,  1.0983e-03, -8.4243e-04,  1.4001e-03,
        -5.8143e-04,  6.9437e-05, -1.3826e-04, -4.0232e-03,  1.8101e-04,
         1.0853e-03, -3.9510e-04, -2.4628e-03, -9.4851e-04,  8.3215e-04,
         6.2611e-04, -9.1466e-04,  8.7989e-03], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-2.3159e-02,  7.2588e-01, -7.2334e-03,  1.2662e-04,  5.1523e-03,
         1.0792e-03, -3.5791e-03, -1.8250e-04,  7.9886e-04, -7.1800e-03,
         7.2984e-04, -1.4194e-03, -8.4291e-04, -8.5790e-05, -1.1749e-04,
         6.9684e-05, -2.5363e-03, -2.0854e-03,  1.2460e-03,  1.1457e-03,
         4.7238e-03, -7.6226e-05,  1.9682e-03,  2.4840e-03,  2.0880e-03,
         3.0763e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1648e-01, -4.4105e+01,  1.1043e-01,  1.0846e-01, -1.0233e-01,
        -1.1208e-01, -7.2939e-02, -9.3436e-02, -6.3925e-02, -6.5411e-03,
         2.0055e-02, -2.4310e-02, -3.2507e-02, -1.1992e-01, -3.7349e-03,
        -3.6506e-02, -1.9455e-01,  7.2365e-02,  1.6577e-01,  7.1844e-02,
         3.0873e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7786e-03,  6.2953e-02, -1.6150e-04, -4.8883e-04, -1.6459e-04,
        -3.2179e-04, -4.4132e-05, -1.4857e-04, -1.0562e-04, -7.8563e-05,
         4.0420e-05, -1.5780e-05, -1.3386e-04, -1.1633e-04,  1.0524e-04,
        -2.1276e-05, -1.5856e-04, -5.9288e-05, -8.2037e-05, -8.2525e-05,
        -1.0840e-04, -5.3598e-05,  1.0775e-05, -8.1458e-05, -1.0924e-04,
        -7.0683e-05, -2.9480e-05, -2.9259e-05, -5.8081e-06, -3.1534e-05,
         1.5574e-05,  1.1246e-04,  3.2133e-05,  1.1118e-05, -2.5820e-04,
         1.9080e-04,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9441e+00,  1.4540e+02,  1.8128e+00, -1.6451e-01, -6.2611e-01,
        -1.2772e+00,  1.4571e-01,  1.4066e-02, -9.8168e-01,  8.9728e-01,
         9.5380e-01, -4.2985e-02,  8.9643e-01, -2.2411e-01,  2.1069e-01,
         1.0009e-01, -5.3808e-02,  2.3440e-01,  8.1169e-01, -3.5166e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4541e+00,  1.5703e+02,  6.5257e-01,  7.3737e-01,  3.1284e-01,
        -4.2171e-01, -2.1486e-01,  9.3240e-01,  5.0005e-01,  6.7524e-01,
         2.1202e+00,  3.8581e-01, -1.5480e-01,  4.5278e-01,  1.0330e+00,
        -8.7588e-02,  3.5161e-01, -1.3745e-01,  4.7107e-01,  1.6659e-01,
        -1.2040e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([   2.5444, -304.8589,    4.2052,   -0.3838,    1.4929,    0.8817,
           0.3210,    0.4785,    0.7836,   -0.3674,    0.7991,    0.3255,
           0.4612,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000,    0.0000,    0.0000,    0.0000,    0.0000,    0.0000,
           0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1349e-01,  3.4468e+01,  1.3331e-02, -3.8240e-02, -1.6339e-03,
        -8.7150e-02, -2.1135e-01, -1.4842e-02, -8.3682e-02, -2.2393e-02,
        -1.0816e-01, -1.2401e-01, -1.0349e-01, -5.1936e-02,  1.1495e-02,
        -3.8467e-02, -2.7287e-02, -7.7886e-02, -4.5159e-02, -1.1272e-01,
        -7.4098e-02,  3.1919e-02, -1.6077e-01, -3.1039e-02, -7.6004e-03,
        -2.6986e-02, -5.8029e-02,  6.9332e-02, -1.1885e-02, -3.3756e-02,
        -1.3020e-02, -4.8780e-02, -3.1989e-02, -2.3731e-02, -4.2400e-02,
         2.9152e-02, -4.1349e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8763e-01, -3.3034e+01, -5.9608e-01,  3.9661e-02, -1.0321e-01,
        -2.9110e-02, -3.7073e-02, -6.5600e-02, -4.2463e-02,  1.3001e-01,
        -7.5437e-03, -6.7213e-02, -8.7827e-02, -1.3086e-01, -4.9858e-02,
         7.2483e-02,  3.1511e-02, -4.1208e-02, -1.9797e-02,  2.6082e-01,
         1.5654e-02,  7.5118e-02, -2.9464e-02,  2.1435e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2370e+00,  1.4360e+02,  2.4902e+00,  1.0338e+00,  3.4314e-02,
         3.1704e-01, -1.2438e-01, -1.0385e-01, -1.4788e-01,  5.5347e-01,
        -5.9451e-01, -9.1554e-01,  4.2275e-01,  1.4190e-01,  5.6750e-01,
         7.1380e-02,  4.0317e-01,  3.5697e-01,  1.8474e-02,  8.8266e-01,
        -1.8975e-02, -6.2434e-01,  3.6915e-01, -4.1586e-02,  1.7355e-01,
         7.1113e-02, -3.3413e-01,  3.0391e-01,  6.2677e-02,  1.1688e-01,
        -4.6660e-01, -2.0097e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.8171e-03,  5.3950e+00, -6.2295e-02, -1.9767e-02, -6.1597e-03,
        -5.6008e-02, -7.9459e-03,  4.5938e-03,  1.8998e-03, -3.1542e-02,
        -2.9280e-03, -7.9446e-03,  5.9896e-03,  3.2207e-03, -1.4399e-02,
        -1.3079e-02,  4.6971e-03, -5.4115e-03,  1.2198e-03,  4.3952e-03,
        -5.5758e-03, -1.3694e-02, -1.5344e-02, -7.3999e-03, -9.8557e-03,
         7.7035e-03,  5.6135e-03, -9.8205e-03, -2.1333e-03, -8.9378e-04,
        -1.7498e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2726e-02,  6.7521e+00,  4.1312e-02,  7.7851e-04,  1.6590e-02,
         4.8807e-03,  3.2092e-02, -4.7706e-02, -2.0465e-02, -2.5149e-02,
         2.9446e-02,  2.1142e-02,  1.5643e-02, -4.4224e-02,  3.5101e-03,
        -1.6320e-02, -4.9953e-03,  2.9734e-02,  1.1603e-02,  4.1919e-02,
        -1.3776e-02, -3.8417e-02,  1.9499e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1836e+00, -5.7980e+01,  2.7454e-01,  1.0616e-01,  2.1160e-01,
        -9.1445e-02, -2.7993e-02,  2.7351e-02, -1.0526e-01, -1.1449e-01,
         4.3720e-03,  2.6847e-01, -5.2648e-02, -1.1612e-01, -1.3332e-01,
         1.5700e-03,  4.9316e-02,  2.0148e-01, -3.2088e-02,  2.7569e-03,
         8.2664e-02, -1.7343e-01, -1.0574e-01,  3.1496e-02, -6.9645e-02,
         3.4085e-02,  1.4953e-01, -1.7339e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 5.2443e-02,  8.0493e+00, -5.0495e-03, -3.6789e-03,  1.1645e-02,
         4.5160e-03, -5.5805e-03,  2.6928e-02,  6.2645e-03, -1.0936e-02,
         2.4920e-03, -4.3005e-04,  7.4000e-03, -8.8913e-03, -3.9283e-03,
         1.9879e-02,  6.3332e-03,  8.8323e-03,  1.3401e-02, -2.1367e-03,
         2.3083e-02, -1.4250e-03,  1.9698e-03,  5.6544e-03,  4.5018e-03,
        -9.9611e-05, -1.3912e-02,  8.0397e-02,  6.0264e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5643e-01,  1.6540e+02, -3.5004e-01,  2.5645e-02,  1.1567e+00,
         1.2353e+00, -2.1875e-01,  2.2435e-01,  2.6702e-01,  6.0574e-01,
        -2.8829e-01, -9.0321e-02,  2.4785e-01,  1.1404e-01, -1.8286e-01,
        -4.2075e-01, -2.8452e-01,  3.0521e-03,  3.3937e-01,  2.1057e-01,
         4.1924e-02,  9.5337e-02, -4.7983e-02,  1.5933e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.1828e-04,  3.9240e-01, -6.1196e-03, -1.8178e-03,  2.0729e-03,
         1.5544e-03, -6.0002e-04, -8.1515e-04,  7.1503e-04,  9.6342e-05,
        -3.6325e-04,  3.4840e-04,  2.8546e-04, -1.1802e-05, -3.8849e-04,
         8.6405e-05,  2.5266e-04, -7.8455e-05,  2.2222e-05,  6.3191e-04,
         1.7183e-03, -3.6532e-05,  4.5402e-05,  1.1515e-03, -6.0643e-04,
        -2.1415e-04,  4.8979e-05,  1.9660e-04, -3.7082e-04,  7.7970e-04,
         5.5833e-04,  1.7082e-04, -7.4403e-04, -4.9948e-04], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2258e+00, -2.1778e+02,  5.1701e-01,  1.5845e-01,  4.4159e-02,
        -1.1418e-01,  9.1097e-02, -3.6388e-01, -2.5742e-01, -2.0793e-01,
        -4.6940e-01,  1.3965e-01, -1.0510e-01,  3.4919e-01,  3.8958e-02,
         6.5179e-02,  6.8890e-01, -6.2801e-01,  1.0315e-01, -2.0372e-03,
        -1.1015e-01, -7.6345e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3244e+00,  8.8077e+01, -8.2983e-01,  5.0465e-01, -9.4980e-02,
        -5.8058e-01,  1.4429e-01,  4.4724e-01,  1.1052e-01,  2.1146e-01,
         4.8801e-01, -9.6501e-02,  1.6353e-01,  1.4197e-01,  1.2282e-01,
         3.3173e-02,  1.6808e-01, -3.5319e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.5551e-03,  1.7564e+00,  9.8527e-03,  6.1349e-04,  5.5810e-04,
        -3.6693e-03,  2.6785e-04,  4.0379e-03, -1.7334e-03, -2.9648e-03,
        -7.0325e-04, -7.7364e-03, -7.0901e-03,  9.9771e-04, -3.6753e-03,
        -1.7248e-03, -1.2855e-03, -1.9357e-03,  2.9263e-03, -4.4835e-05,
         5.3375e-04, -1.2249e-03,  1.9798e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6310e+00,  1.2051e+02, -7.7442e-01, -4.1778e-01, -6.2649e-01,
         9.8307e-02, -1.0148e-01, -9.1012e-02, -3.1934e-01, -1.4868e-02,
        -5.9793e-01,  3.3199e-02, -2.7409e-01, -5.9420e-02, -4.8191e-02,
         1.6261e-02, -1.0770e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3462e-02,  2.8848e+00,  2.6775e-02, -5.6821e-03,  5.9066e-03,
         8.1039e-03,  1.0023e-02,  2.1499e-03,  2.4604e-02, -1.7054e-02,
         1.9250e-02,  9.9954e-04,  6.5235e-03,  1.9646e-02,  7.4970e-03,
         1.0960e-03,  7.0692e-03, -5.7843e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.0573e-01,  1.5341e+02, -2.8503e+00, -7.1145e-02, -9.0579e-02,
        -2.8088e-02, -2.0299e-01, -2.3927e-01, -2.8952e-01,  6.2045e+00,
         2.3027e-01, -2.1322e-01,  5.5874e-01, -3.7818e-02,  1.1316e-01,
         2.2071e+00,  2.5175e-01,  1.0721e+00, -4.7229e-02,  3.6323e-02,
        -1.1172e-01, -1.2693e-01, -1.9822e-01, -6.4297e-02,  5.0112e-01,
        -2.5328e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9659e-02,  1.6601e+00,  2.9996e-03, -1.6012e-02, -1.8616e-02,
        -5.8157e-03, -6.7382e-03, -6.4233e-03, -1.6182e-03, -1.0560e-02,
        -9.7434e-04,  7.3717e-04, -1.6005e-03, -1.4402e-02, -1.8995e-03,
        -4.6335e-04, -3.2035e-03, -1.8412e-03,  1.4408e-03, -2.3388e-03,
         4.9338e-04,  1.7410e-03,  2.3700e-03, -4.0168e-03, -4.6110e-03,
        -2.0399e-03, -2.8333e-03, -1.6809e-03, -4.6596e-03,  4.7460e-03,
         4.0541e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5608e-02,  4.3338e+00,  3.1515e-02, -1.9866e-02, -8.6610e-03,
        -5.7015e-03,  6.3933e-03,  1.4779e-02, -9.0359e-03, -3.1918e-02,
         9.9377e-05, -1.0502e-02,  1.1431e-02, -5.9848e-03, -1.0273e-02,
         7.4289e-03,  6.7184e-03,  6.9162e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1198, 32.4522,  0.4201,  0.2770,  0.1038, -0.0667,  0.1138, -0.1556,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 4.2010e-04,  1.0401e-01, -1.3025e-04, -2.4078e-04, -1.4284e-04,
        -9.7323e-05, -3.7274e-04, -2.6062e-04,  3.2669e-04, -1.8810e-04,
         1.9523e-04, -1.7436e-04, -1.4830e-04, -2.4354e-04, -2.3412e-05,
         1.7981e-04, -1.7890e-04, -2.3834e-04,  1.0296e-05, -1.4015e-04,
        -4.5632e-05, -1.0546e-04, -3.7198e-04, -1.2390e-05,  3.5121e-05,
        -1.1519e-04, -1.3350e-04,  2.9102e-04, -1.0204e-04, -6.0797e-06,
        -4.5964e-04, -1.4430e-04,  3.7796e-05,  3.8806e-05, -7.7802e-05,
        -1.2442e-04,  9.8613e-05, -1.6841e-04,  6.6621e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8089e-04,  6.5337e-02,  5.1750e-05, -3.3777e-04, -7.6837e-05,
        -5.2555e-06, -1.9865e-07, -7.3822e-05, -1.5203e-04, -1.9193e-04,
        -1.2814e-04, -2.7558e-06,  5.5429e-05, -5.7776e-05,  2.4207e-05,
         3.7472e-05, -1.8971e-04, -1.3202e-04, -2.7333e-05, -1.8648e-04,
         2.6808e-04, -4.1735e-04,  8.2893e-05, -4.7696e-05,  1.8225e-05,
         9.5129e-05, -8.4695e-05,  4.0830e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7184e-01,  1.3860e+02,  3.2103e-01, -2.3741e-01, -8.5048e-02,
        -5.3902e-01, -4.0187e-01,  7.4994e-03,  5.8633e-01,  8.0508e-02,
        -3.3282e-02,  2.8558e-01, -3.6193e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1035e-01,  2.1632e+01, -3.8473e-02,  3.9934e-03, -6.8503e-02,
         7.6552e-02,  1.0424e-02,  2.2036e-02, -1.6414e-02,  6.6076e-02,
         5.2518e-02,  4.5498e-03, -3.6134e-02,  1.4396e-02,  2.1652e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5723e-02,  5.2466e+00, -5.9437e-02,  1.6988e-02,  1.6715e-02,
        -3.6178e-03, -6.7377e-03, -3.7331e-03,  2.7234e-04, -1.1262e-02,
         7.8758e-03, -2.1715e-03,  1.5732e-02,  9.8607e-03, -7.6667e-04,
         2.0996e-03,  3.3784e-04,  2.7125e-03, -6.0975e-03, -1.0353e-02,
         1.1198e-02, -2.3487e-03, -8.9714e-03, -1.6625e-03, -5.9730e-03,
         1.3781e-03,  4.9259e-03,  1.0306e-02, -7.7295e-04,  8.7901e-04,
         6.7913e-03,  5.5887e-04, -8.3594e-03,  6.4628e-03,  1.6417e-03,
        -1.6326e-03, -3.7291e-03,  1.5542e-03,  1.8167e-04,  2.0283e-03,
         1.2247e-02,  1.2777e-03, -1.3864e-03, -1.5346e-03, -8.3171e-03,
         7.9638e-03, -6.6533e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4705e-03,  9.5904e-01, -8.7472e-03, -5.5957e-03, -1.1515e-04,
        -6.9980e-04, -5.9494e-05, -5.0780e-03, -4.0760e-03,  8.2442e-04,
        -3.7939e-03,  5.1307e-04, -7.0504e-04,  3.1214e-04, -7.3736e-04,
        -1.7420e-03,  1.6704e-03,  4.2358e-04,  1.1126e-03, -9.3444e-04,
         7.2587e-05,  1.9174e-04,  8.3093e-04,  8.4851e-05,  6.3598e-04,
        -1.0547e-04,  1.2870e-03, -1.6975e-03, -9.4503e-05, -1.4591e-03,
         1.3243e-03, -6.9041e-04,  1.2009e-03,  3.3722e-05,  7.5606e-05,
        -1.8561e-04, -3.4390e-04, -2.8392e-04,  1.5188e-04,  1.0015e-03,
         9.7834e-05,  7.0716e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4411e-03,  8.8636e-01,  5.2422e-03, -2.8714e-03,  6.6096e-03,
        -2.0302e-03,  7.0901e-03, -1.4451e-03, -2.9749e-03,  2.4174e-03,
        -1.0119e-03, -4.0565e-03,  6.1773e-04,  1.1481e-03,  3.2097e-03,
        -8.8268e-03,  3.0254e-03,  2.0781e-04,  3.9170e-04,  8.2199e-04,
        -1.0685e-03,  6.2166e-04, -3.4178e-03,  5.9741e-04, -5.7920e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.3341e-03,  3.5069e+00,  3.1647e-02, -1.1666e-02, -4.1671e-03,
         5.0695e-05,  1.1137e-02, -2.4170e-04,  2.8104e-03,  8.8471e-04,
         5.6766e-03, -5.6293e-03,  4.8961e-03, -1.4717e-02, -5.1145e-04,
         1.2473e-03, -2.0278e-03,  1.7046e-03, -4.5029e-03,  8.4977e-03,
         5.7896e-03,  9.6478e-03, -2.0260e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7757e-02,  1.1653e+00,  1.8852e-02,  2.4214e-03, -1.3537e-03,
        -2.9562e-03, -3.6232e-03, -1.9300e-03, -6.0754e-04, -6.0553e-03,
         1.4988e-03, -1.7674e-03,  1.8481e-03, -4.5558e-04, -4.2096e-04,
         2.4147e-04,  3.4945e-03,  4.8471e-04, -3.3098e-03,  3.1330e-03,
        -3.0697e-03,  1.9016e-03, -4.7283e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9562e-03,  2.9150e-01,  2.3345e-03,  1.4035e-03, -1.9533e-04,
        -7.3877e-05, -7.4313e-04,  6.4285e-05,  1.8075e-03, -6.1421e-04,
         1.1303e-03, -1.6693e-03, -5.2709e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8630e-02,  1.8610e+00,  1.0462e-02, -2.0543e-03,  5.3990e-04,
         5.0489e-03, -1.5890e-03,  4.9396e-03,  3.2573e-03,  8.2455e-03,
        -2.8789e-03, -3.5604e-03, -1.3328e-04, -6.9202e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7383e-01,  2.3811e+02,  4.1112e-01, -9.2076e-01, -7.4675e-01,
         4.4647e-02, -5.8717e-01, -2.8762e-01,  5.6765e-01,  5.9252e-01,
         5.3944e-02,  9.7822e-02, -9.5995e-01,  2.7725e-01,  2.1071e-01,
        -1.5415e-01, -9.8918e-01,  6.3398e-01,  1.4384e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-3.0670e-02,  4.3673e+00, -7.9418e-03, -1.6746e-03, -6.7854e-03,
        -2.7495e-03,  8.9021e-03, -2.7007e-03, -3.6158e-03,  8.4858e-03,
         6.3831e-03, -8.2754e-03, -2.7244e-03,  1.2380e-02, -1.1085e-02,
         9.3568e-03,  7.4379e-03,  1.3355e-03,  1.4991e-03,  1.7119e-02,
         4.1056e-03,  7.7370e-03, -1.8971e-02,  5.0787e-03,  1.1685e-03,
         1.1170e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7814e-03,  1.7351e-01, -3.9867e-04, -2.0260e-03,  7.6562e-04,
        -1.5761e-03, -7.8193e-04, -2.1565e-03, -4.8895e-04,  1.2740e-03,
         3.9691e-04, -6.5908e-04,  2.5389e-04,  1.4336e-04,  8.7562e-04,
         1.1899e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.9860e-04,  1.4920e-01, -4.5653e-04,  5.3586e-04, -9.7801e-05,
         5.6662e-04, -5.0347e-04, -6.3741e-05, -2.9990e-05,  1.9912e-04,
         1.9789e-05, -8.9353e-04, -1.5907e-04,  2.9465e-05, -3.2235e-04,
        -1.0407e-03,  1.3128e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0961,  9.7598, -0.0550, -0.0254,  0.0489,  0.0384,  0.0683,  0.0523,
         0.1042,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3490e-03,  1.1255e+00,  4.0335e-03,  3.2027e-03, -2.0737e-03,
         2.0180e-03,  5.8649e-05,  7.5843e-04,  1.3434e-03, -1.6772e-03,
         2.7545e-03,  6.5364e-03,  1.5618e-03, -1.2056e-03, -9.2956e-04,
         1.0566e-03,  1.4576e-03,  2.3768e-03,  1.5695e-03,  3.4977e-04,
         4.8039e-04,  6.3375e-03, -4.7734e-04,  6.7579e-05,  2.0983e-04,
        -6.5499e-03, -9.1411e-04, -1.6787e-03, -5.4842e-04,  7.4500e-04,
        -1.1120e-03, -2.7960e-04,  3.4816e-04,  2.9723e-05, -6.8562e-05,
        -5.8730e-04, -2.3665e-03, -4.0280e-04, -1.7565e-03, -1.9831e-03,
         1.3917e-03,  1.7576e-03, -3.1942e-03,  1.4023e-03, -1.1888e-03,
        -4.2232e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5305e-03,  3.7820e-01,  8.0572e-04,  2.9868e-04,  2.0320e-04,
         4.1783e-04,  9.5628e-04, -5.9933e-05, -2.1085e-04,  4.0754e-04,
         3.9676e-04, -4.1451e-04,  2.5566e-03, -2.9173e-04,  1.1465e-04,
         9.0818e-04,  3.1686e-04,  6.5371e-04, -1.7243e-04,  1.5007e-03,
         4.4598e-04, -1.8932e-04,  7.6967e-05, -7.1510e-04,  4.2242e-05,
        -6.5179e-04, -3.4465e-04,  8.6466e-04,  6.2539e-04,  4.5047e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0601, 10.0260,  0.0245, -0.1397, -0.1114, -0.1121, -0.1160,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1860e+00,  1.1705e+02,  1.7314e-01, -6.3009e-01, -2.9457e-01,
        -1.7169e-01, -7.2704e-02,  5.7543e-01,  3.3777e-01, -9.8699e-02,
        -4.7549e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3176e-04,  5.8365e-01,  3.0718e-03,  1.3773e-02,  4.2803e-03,
        -5.3084e-04,  1.4617e-03,  2.3705e-03,  7.2678e-03, -1.4373e-03,
        -2.2412e-03,  5.4465e-04,  1.0411e-03, -9.3071e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0290e-03,  1.7455e-01, -2.5166e-04, -3.4696e-04, -2.4477e-04,
        -1.0453e-04, -1.3953e-04, -3.0858e-04, -1.0701e-05, -5.6455e-06,
        -1.9502e-04, -1.6784e-04, -1.0957e-04, -2.2249e-03, -3.9119e-05,
         2.2372e-04, -2.3132e-05,  1.5929e-04, -3.2560e-04,  5.8963e-04,
        -5.4542e-05,  7.7398e-06, -2.0139e-04, -1.1612e-04, -2.4583e-04,
         1.9396e-04,  4.1352e-04,  7.4454e-05, -1.9626e-04,  1.4504e-04,
        -1.3031e-04, -4.6343e-04, -4.2503e-04,  6.7726e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6456e-03,  1.5261e-01,  1.1610e-03,  7.6587e-04,  8.4028e-04,
         2.1953e-04,  1.8317e-04, -1.9449e-04,  5.7629e-05,  4.8045e-04,
        -1.3317e-04,  1.8038e-04, -1.9280e-04,  1.6851e-04,  1.1652e-03,
        -5.9897e-04, -1.8046e-03,  4.1414e-05, -3.0874e-04,  2.9325e-04,
        -4.9284e-04,  2.6820e-06,  3.0602e-04, -1.7171e-04, -3.0477e-04,
         1.4040e-05, -4.1956e-05, -3.7126e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1840e-04,  1.2587e-01, -8.2918e-04, -3.0461e-04,  1.1375e-04,
        -3.7707e-04, -9.3622e-05, -1.8241e-04, -1.4844e-04, -1.0625e-04,
        -3.6838e-04, -4.0159e-04, -3.2933e-04,  1.9022e-04, -3.3971e-04,
        -6.0976e-05, -2.1858e-04,  3.8275e-05,  1.6442e-04,  2.0901e-05,
        -7.3966e-05,  4.7874e-05, -3.9904e-04,  6.2015e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-1.0094e-02,  2.8280e+00,  1.4031e-02, -3.3940e-03,  1.2360e-02,
         5.3322e-03, -5.1735e-03, -2.8190e-03,  1.6010e-03, -2.6929e-03,
        -1.3385e-03,  8.4005e-03,  2.7727e-04,  1.4974e-03,  1.0668e-03,
         5.9311e-03,  5.1911e-03,  1.7075e-03,  3.0841e-03, -2.9862e-02,
         2.7239e-03, -1.3159e-03,  7.8446e-03,  5.4543e-03, -7.5941e-03,
        -1.6494e-03, -6.5143e-03, -4.2069e-03, -3.0384e-03,  2.3927e-04,
         8.9994e-03,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7172e-02,  1.3415e+00,  5.3965e-04, -2.9665e-03, -4.9667e-03,
         1.1751e-03, -3.7617e-03, -2.8268e-03, -1.1818e-03, -1.2539e-05,
        -7.5084e-04, -2.0295e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.0349e-04,  1.5646e-01,  6.8517e-04,  1.2081e-04,  6.9759e-04,
         5.5508e-04, -4.2245e-04, -3.3083e-04,  2.5593e-04,  1.1468e-04,
        -2.4481e-04,  1.0850e-03,  8.3462e-05,  1.7334e-05,  1.2112e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3491e-01,  6.3650e+01, -1.7950e-01, -4.8133e-02,  9.6441e-02,
         8.1346e-02,  1.3747e-01,  6.6913e-02,  1.4288e-01,  6.3418e-02,
        -2.4323e-02,  1.5079e-01,  1.9525e-01,  1.5081e-01,  1.4120e-01,
        -3.6698e-02,  8.0401e-02, -8.5879e-02, -1.3468e-01, -4.1420e-02,
         5.3339e-02,  3.3582e-02, -5.5991e-03,  3.8952e-02,  1.1291e-02,
         1.0531e-01,  8.8455e-02,  2.0371e-02,  1.8853e-02, -2.2355e-02,
         1.6663e-01, -4.5715e-01, -2.2668e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6240e-03,  4.9901e-01, -7.0428e-03, -9.2411e-04, -8.5420e-05,
        -6.4671e-04,  3.3933e-04, -2.9063e-03, -1.7145e-03, -4.9011e-04,
        -1.8180e-03, -1.4681e-03,  3.6531e-04,  2.6998e-04,  1.8787e-03,
         2.9726e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0076,  0.6595, -0.0077, -0.0042,  0.0031, -0.0018, -0.0010, -0.0030,
        -0.0027,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0054,  0.2456, -0.0112, -0.0060,  0.0038,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3246e-02,  9.6346e+00,  1.2171e-01,  3.2783e-02,  9.0787e-03,
        -1.5424e-02,  1.5245e-02,  3.5496e-02, -9.0569e-03,  4.8948e-02,
         4.8830e-02, -4.4424e-03,  8.9975e-03,  1.1502e-02,  1.6188e-02,
         1.0470e-02,  1.5517e-03, -4.9634e-04,  1.6344e-02,  4.9000e-03,
         7.3721e-03,  1.4034e-02,  1.0135e-03, -2.4663e-02,  5.4818e-03,
        -1.3664e-02,  5.0226e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0362,  1.2488,  0.0022,  0.0191,  0.0071,  0.0157,  0.0018,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2774e-02,  8.1249e+00,  2.3444e-02,  3.5987e-02, -5.1383e-03,
         2.6819e-02, -3.0771e-02, -1.0424e-02, -5.9637e-03,  2.2858e-02,
        -9.5335e-03, -8.5996e-03, -1.3724e-02,  3.1045e-02, -8.2817e-03,
        -1.3137e-02,  1.4129e-02, -8.1113e-04,  3.7652e-03, -1.0504e-02,
         2.0034e-02,  5.0077e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8777e-04,  1.5611e+00,  3.9532e-03,  2.1506e-03,  5.4314e-03,
         1.1669e-03, -2.7963e-03, -4.0375e-03,  2.2836e-03, -4.8618e-03,
        -3.3099e-03, -1.5173e-03, -6.0727e-03,  1.9780e-03, -1.8273e-03,
         2.8384e-03, -4.8237e-04, -1.5588e-03, -4.5644e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5998e-04,  1.5369e-01, -1.0490e-03, -4.4853e-04,  3.3537e-04,
         1.3644e-04,  1.6350e-04, -8.3799e-05,  4.8655e-05, -2.6898e-04,
        -6.7643e-05,  2.5059e-04,  9.5776e-05, -1.0363e-04,  1.8327e-04,
        -3.2928e-05,  2.7087e-04,  1.1273e-04,  6.2970e-04,  1.7168e-04,
         1.8868e-04,  5.9200e-04,  8.0144e-05,  8.1668e-05, -7.0984e-05,
         3.1135e-05,  2.7152e-05,  7.9464e-05,  4.2775e-05, -5.7850e-05,
         2.5696e-04,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-2.5589e-03,  4.0412e-01,  4.5623e-03,  1.2705e-03,  5.2450e-04,
        -3.1429e-04,  6.7419e-04,  1.1514e-04,  6.6008e-04,  7.2501e-04,
         1.9936e-04,  6.3769e-04,  2.8395e-04,  3.3510e-03,  2.4395e-04,
         3.4260e-04,  4.7545e-04,  5.0484e-04,  1.5268e-03,  1.8812e-04,
         5.5968e-04,  8.9899e-04,  5.1862e-04,  1.8991e-03, -6.8813e-04,
        -7.4264e-04,  1.1820e-04, -3.4902e-04, -1.3449e-04,  1.0705e-03,
         1.8336e-04,  2.6087e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2941e-01,  2.4247e+01,  9.7677e-02,  2.8577e-03,  3.0738e-02,
        -1.7377e-02,  6.7483e-03, -1.5136e-02,  1.3027e-01,  3.7141e-02,
        -1.6925e-03,  1.5632e-01,  4.8426e-02,  2.8826e-02,  3.3344e-02,
        -2.5470e-02,  5.1610e-02, -2.0492e-02, -2.1414e-03, -1.2487e-02,
        -2.2800e-02,  7.0715e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6976e-04,  5.5611e-01, -3.8597e-03, -1.4857e-03,  2.5923e-04,
         1.2077e-03, -3.5938e-04, -5.6603e-05,  3.7983e-04,  2.2459e-04,
         2.8204e-04,  5.7523e-04, -2.6505e-04, -5.3635e-04, -2.5974e-03,
         3.3118e-04, -4.5262e-04,  3.0267e-03, -1.6041e-03,  1.9519e-04,
         1.2128e-03,  3.7814e-04, -5.0862e-05, -4.4304e-04, -4.3379e-04,
         3.2672e-04,  3.8315e-04, -1.9125e-03,  2.2952e-04, -6.6360e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5651e-04,  3.1254e-01,  2.0023e-03, -7.8851e-05, -2.2539e-04,
        -9.9574e-04,  8.4701e-04, -6.2715e-04,  8.4148e-04, -2.1062e-03,
        -3.1222e-04, -3.0820e-05, -5.4207e-04,  2.2113e-04, -4.9329e-04,
        -1.1396e-03, -1.4293e-03, -1.1592e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1051e-04,  4.2114e-01,  1.1004e-02, -9.7995e-05,  2.3890e-03,
        -1.6596e-03, -6.4432e-04, -4.7125e-04, -6.6587e-04,  4.6854e-05,
         3.4859e-05, -9.2954e-04, -3.1223e-04, -1.3015e-04, -4.9310e-04,
        -1.9805e-04,  3.1766e-05, -1.4824e-04, -1.1240e-03, -4.9874e-04,
         5.4505e-04, -1.7023e-03,  3.5376e-04,  7.3672e-04,  2.6338e-05,
        -8.2727e-05, -1.3132e-04, -8.1293e-04, -1.7573e-04, -1.0859e-03,
         2.6235e-04, -9.3290e-05,  8.0204e-04, -9.0387e-05, -5.0252e-04,
        -6.5967e-04, -1.3266e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9850e-01,  5.4647e+01, -1.3768e-01,  1.3070e-01,  1.8918e-01,
         1.7120e-01,  2.8314e-01, -3.9375e-01,  8.8485e-02,  5.1033e-02,
         2.3547e-02, -2.4516e-01, -1.4736e-01,  1.2723e-01,  2.2305e-01,
         1.0030e-01, -2.0733e-02,  2.3996e-02, -8.4516e-03,  1.0174e-01,
         1.0884e-01,  3.2379e-02,  9.4088e-02, -3.6448e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1191e-04,  3.0825e-01, -3.7427e-04, -6.3446e-06, -1.4712e-04,
         3.3560e-05, -4.2048e-04,  3.6478e-04, -1.1273e-04, -2.1396e-04,
        -3.8524e-04, -1.5943e-04, -4.7677e-04,  2.0332e-04, -2.5653e-04,
         2.3859e-04, -2.2478e-04, -4.9072e-04,  3.3319e-04,  5.2414e-04,
         3.1294e-04, -4.3437e-04,  1.3923e-04, -4.1842e-04, -1.6929e-04,
         7.0161e-04,  3.6038e-05, -3.2333e-04,  1.2238e-04, -8.6472e-05,
         5.9439e-04,  4.2808e-05,  2.2479e-05,  4.3068e-04,  4.9935e-04,
         6.0222e-04,  3.0805e-04,  8.0655e-05, -6.3070e-05, -2.0526e-04,
         2.4083e-04,  8.5021e-04,  6.5911e-04,  1.2605e-03,  3.2765e-04,
        -9.1479e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4456e-02,  1.7024e+00, -5.9111e-03, -2.9435e-03,  4.4631e-03,
        -1.4790e-03,  2.2638e-03,  6.1745e-04,  3.2540e-03, -5.4075e-03,
         2.1529e-03,  3.8279e-04, -3.2774e-03, -8.0950e-03,  4.4935e-03,
         4.0699e-04, -1.7424e-03,  2.5209e-04, -2.6688e-03, -2.0181e-03,
         2.1341e-03,  3.9913e-03, -7.3711e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4729e-02,  3.8375e+00, -6.8613e-03,  1.7527e-02, -2.7757e-03,
        -2.8578e-03,  1.2878e-02,  2.4046e-02,  2.6798e-02, -6.6932e-05,
        -5.9180e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1260e-02,  2.2965e+00, -1.5655e-02,  1.1665e-03, -2.1213e-03,
        -7.1113e-03, -3.1853e-03, -1.5716e-03,  3.5201e-03, -3.7414e-03,
         2.0282e-03, -6.4718e-03, -7.6176e-03, -1.2974e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4122, 17.5519, -0.1485, -0.0460, -0.0790, -0.0469, -0.1876, -0.0995,
        -0.1047, -0.4380,  0.0289, -0.0911,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4272e-02,  3.9716e+00, -1.5520e-02,  8.2073e-03,  5.2706e-03,
         1.7010e-02,  5.0173e-03,  3.9769e-03,  4.8351e-03,  1.7661e-02,
         7.2953e-03,  2.3823e-03,  4.1954e-03,  1.6314e-03,  2.1363e-03,
         1.1778e-03,  5.0056e-03,  5.7996e-03,  9.9149e-03,  2.3895e-03,
        -6.0143e-04,  2.9548e-03,  6.4345e-03, -2.5206e-03,  1.8631e-03,
        -7.7406e-03,  4.7365e-03, -2.2663e-03,  1.5650e-03, -3.7931e-03,
        -6.3899e-04,  8.5316e-03, -4.1269e-03, -1.9115e-03,  2.7651e-03,
        -2.5177e-03, -7.8587e-04,  4.9219e-03,  1.1710e-03, -1.7554e-03,
         3.1268e-03,  6.4209e-03, -1.4934e-03,  3.7050e-03,  9.5114e-04,
         7.7958e-04,  2.1863e-03, -9.9107e-05,  3.3322e-03,  3.7593e-03,
        -5.5148e-04,  1.6717e-03, -1.1014e-03,  2.3995e-03,  3.0037e-03],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 2.2062e-01,  5.0306e+01, -7.3983e-02, -1.7211e-01,  3.6943e-03,
        -2.1705e-02, -7.6503e-02,  3.5410e-03,  1.8412e-01, -1.4410e-01,
        -1.0178e-01, -1.8563e-02,  5.4526e-02, -1.8030e-01,  5.0498e-02,
         1.5224e-01, -9.8369e-02,  2.7003e-02,  4.3188e-02,  1.9251e-02,
        -1.5486e-02,  2.2847e-03, -1.0357e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.7576e-02,  8.5938e+00, -2.3808e-02,  1.6043e-04,  1.1196e-02,
        -8.2495e-04, -1.2232e-02, -2.4589e-03, -5.8953e-02, -1.1780e-02,
        -1.2540e-02,  7.3157e-03,  1.3997e-02,  1.1683e-02,  1.9237e-02,
         2.2770e-03,  9.2988e-02, -1.6546e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0911e-02,  5.2330e+00, -5.3807e-02,  5.8932e-03,  2.3156e-02,
        -1.2439e-02, -2.2848e-02,  1.8572e-02,  2.2314e-02,  7.9913e-03,
         2.3068e-02,  8.5963e-03, -1.3322e-03, -4.0457e-03,  3.2890e-02,
        -1.0359e-02,  1.6144e-02, -2.5504e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4748e-03,  5.0822e-01,  1.6995e-03, -1.7330e-04, -1.8405e-03,
         9.5857e-03,  1.9980e-03, -2.4489e-03,  2.1145e-03, -7.5318e-04,
         1.1092e-03,  1.0823e-03,  1.9333e-03, -2.7665e-03, -3.2181e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8425e-05,  5.9057e-02, -1.8183e-04, -2.7390e-04, -8.9242e-05,
        -6.0941e-05, -1.2721e-04, -9.8578e-05, -1.5566e-04,  1.2954e-05,
        -2.3637e-05, -1.5715e-04,  1.0027e-04, -5.1067e-05, -1.7279e-04,
        -6.3260e-05,  3.0165e-04, -3.3763e-05,  7.5023e-05,  2.7707e-04,
        -1.8820e-05,  2.8146e-06,  1.0864e-05,  4.5653e-05, -1.6029e-05,
        -2.7084e-05,  3.6730e-05, -1.5346e-05, -1.8519e-04, -4.5980e-06,
         7.4538e-05,  1.9803e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9926e-01,  1.0596e+02,  3.5547e-01,  4.1365e-01,  7.1459e-01,
        -3.6475e-02,  1.3929e-01,  2.4427e-01, -2.5997e-02, -1.3164e-01,
         1.6480e-02, -1.2451e-01, -4.5070e-01,  9.9544e-02,  3.0209e-04,
         4.6471e-04, -1.7975e-01, -7.2519e-02,  1.9590e-01,  9.3840e-02,
         2.3080e-01, -3.2431e-01, -2.9903e-01, -7.2869e-03,  1.3183e-01,
         2.7751e-02,  1.7018e-01,  1.9725e-01,  2.2962e-02, -1.3132e-01,
         1.0681e-01,  1.2909e-01,  1.2190e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0485e-02,  1.1012e+01, -5.9332e-03,  4.9500e-02,  3.8384e-02,
        -1.4100e-03, -8.6496e-03,  2.3270e-02,  2.3371e-02,  6.7653e-03,
        -2.8964e-03, -1.1109e-02, -8.7512e-03, -1.2393e-02, -1.6153e-02,
        -1.4960e-02,  8.6514e-03,  2.5621e-03,  7.4962e-03, -6.2791e-03,
         3.4651e-03, -6.0296e-03,  1.7420e-03, -1.6341e-02, -1.4088e-03,
        -3.1613e-04, -4.9165e-03, -6.6801e-03, -1.9148e-02,  9.4122e-03,
        -6.8801e-04,  4.7705e-03,  2.8593e-03, -1.0110e-02,  3.8025e-03,
        -1.1562e-02,  1.4640e-03,  9.0595e-03,  4.9536e-03, -6.4774e-03,
         1.6660e-03, -8.6544e-03,  6.4740e-03, -1.6439e-03,  8.4197e-03,
         3.0219e-03, -8.4347e-04,  9.4405e-03, -1.9171e-03,  2.3893e-02,
         6.1476e-04, -3.1542e-04,  5.6550e-04,  1.7014e-03, -4.0043e-03,
         9.0159e-03,  4.7185e-03, -1.0560e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8716e-01, -1.0032e+02, -5.7587e-01, -9.1149e-02,  2.0930e-01,
         1.5257e-01,  2.0861e-01, -6.2620e-02, -7.7046e-02,  8.2941e-02,
        -8.8392e-02,  1.9603e-01, -1.1642e-01,  7.8899e-02, -7.8772e-01,
        -6.0650e-02, -9.8036e-02, -1.2176e-01, -6.9341e-02, -2.0320e-01,
         1.3537e-01, -8.0683e-02,  4.3526e-02, -4.7822e-02,  1.4818e-01,
         1.9386e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0436e-03,  6.4926e-01, -8.5291e-04,  3.7576e-03, -1.4147e-03,
         4.0476e-03, -8.2033e-04,  2.9818e-03,  6.2467e-04,  1.9630e-03,
        -5.5365e-05, -1.6031e-03, -4.0554e-04,  1.8635e-03,  2.3893e-04,
         1.1202e-03,  1.6940e-03,  8.3682e-04,  1.7341e-03, -1.4661e-04,
         1.6540e-03, -2.6037e-03,  6.5941e-04,  2.7449e-03, -2.4168e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.4210e-02,  3.7025e+00, -1.7836e-02,  5.1837e-03,  1.7563e-02,
        -4.8704e-03, -2.8532e-03, -6.5114e-03,  2.7841e-03, -7.2929e-04,
         1.4778e-03, -2.5177e-03, -1.3058e-02, -3.6573e-03, -1.6292e-03,
         5.9611e-04, -3.8863e-03, -3.9542e-03, -2.5319e-03,  1.1537e-02,
         2.2066e-03, -1.6300e-03,  2.6403e-03,  1.1533e-03,  2.8823e-03,
        -1.1944e-02, -2.1095e-03, -5.4608e-03, -9.9996e-04, -1.2677e-03,
         5.5129e-03, -2.8725e-03, -5.6992e-03, -3.8952e-03,  1.9872e-03,
        -3.5155e-03, -3.8707e-03, -1.9484e-03, -3.0424e-03,  1.2791e-03,
        -3.7451e-03,  3.1274e-03, -1.5361e-03,  3.7182e-03,  6.9516e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.9000e-04,  1.1071e+00, -6.1480e-03,  1.7737e-03,  1.0065e-03,
         9.9866e-04, -8.1878e-04,  1.3664e-05, -1.2174e-03, -3.5907e-03,
        -4.0165e-03,  1.7347e-03, -2.5975e-03,  1.2361e-03, -2.3454e-03,
        -1.5344e-03,  1.2260e-02,  6.1112e-04, -5.6475e-04, -1.3940e-03,
         4.3386e-03, -6.8437e-04, -9.7058e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.8819e-01, -1.8607e+02, -1.9744e-01, -9.5081e-01,  5.4406e-01,
        -2.0882e-01,  1.5208e+00,  7.8992e-02, -2.7097e-01, -1.5832e+00,
        -4.3749e-02, -1.7850e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 3.0014e-02,  6.0343e+00,  8.7515e-02, -2.9728e-02,  3.7096e-03,
         1.2455e-02,  1.2909e-02, -1.1592e-03,  5.8790e-03, -2.7071e-02,
        -3.3220e-03, -1.0487e-02,  1.4631e-02, -7.8428e-03, -7.2660e-03,
         6.9033e-03,  2.2529e-03,  1.5032e-02,  1.0346e-03, -1.9087e-03,
        -1.4823e-02,  2.5577e-03, -1.0318e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1582e-02,  5.6244e+00,  1.7350e-02,  7.9490e-03, -2.1543e-03,
        -9.3341e-03,  2.4084e-02,  8.5395e-03, -1.5262e-03,  1.2205e-02,
         4.5643e-03, -2.7971e-03,  2.7906e-03,  3.0754e-03,  4.5516e-03,
        -9.1262e-03,  2.4276e-03, -3.1362e-03, -6.9073e-03, -2.3445e-03,
         3.0952e-03,  3.4420e-03, -1.2833e-02, -5.0956e-03,  3.1285e-04,
        -4.3700e-03, -1.0800e-02,  5.5801e-05, -7.4647e-03, -5.1050e-03,
         5.8005e-03, -1.7217e-03, -1.1296e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7775e-02,  2.2359e+00, -1.9573e-02,  1.2848e-04,  8.3854e-03,
        -7.5035e-03,  3.0331e-03, -2.4580e-03,  2.2264e-03,  1.3561e-04,
         2.8872e-03, -9.6218e-04,  7.8227e-03,  4.5149e-03,  9.0962e-04,
         1.3855e-03,  5.3053e-04, -5.8224e-03, -2.1766e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0993e-03,  3.7226e-01,  4.9164e-03, -2.9960e-04, -7.2284e-05,
         1.8921e-03,  7.6891e-04, -1.3520e-04,  7.4999e-05, -3.4857e-04,
        -1.5525e-03,  1.2219e-04, -2.6089e-03, -9.4414e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3266e+00,  2.3869e+02,  5.4269e-01, -1.6220e+00, -2.0681e-01,
        -2.6609e-01, -2.1339e-01, -3.9938e-01, -1.3221e-01,  2.4857e-01,
        -4.0125e-01, -6.7169e-02, -2.5262e-01,  7.3012e-01,  8.7373e-02,
        -3.0077e-01, -2.3665e-01,  2.1169e-02, -1.5816e-01, -1.5120e-01,
         4.6213e-02, -1.1089e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4629e-01,  6.1622e+01, -3.2169e-01,  8.3162e-02, -8.5166e-02,
        -2.1177e-01, -1.6399e-01, -8.0792e-02,  3.3924e-02,  1.1200e-01,
        -1.6163e-01,  6.6184e-02, -2.8907e-02,  6.2859e-02, -7.5866e-03,
         1.0451e-01, -8.7882e-02, -1.2129e-01, -2.9840e-01, -5.1857e-02,
        -1.2299e-01,  1.5844e-02, -5.1791e-02,  1.2375e-02, -1.1506e-01,
        -5.4934e-02, -1.2117e-01, -6.8246e-02,  1.5646e-01, -1.8140e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8888e-02,  1.9374e+01,  2.8736e-02, -9.7850e-02, -7.5826e-02,
         7.2244e-02,  6.8819e-02,  8.5091e-02,  1.2030e-02,  2.4450e-02,
         6.2371e-02,  2.3700e-02, -2.5196e-02,  5.8602e-02, -6.0500e-02,
         2.9855e-02,  1.0566e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.9567e-03,  4.0748e-01,  1.6921e-04, -2.6777e-03, -1.9107e-03,
        -2.0591e-04, -1.6200e-03, -2.6696e-03, -3.9530e-03, -1.4796e-03,
        -2.9635e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3600e-01,  2.8842e+01, -8.2471e-02, -1.0532e-01,  5.9678e-02,
         3.1243e-02, -3.7495e-02,  2.0148e-02,  1.0089e-02,  4.8316e-02,
        -4.7908e-02,  8.6253e-02,  1.8510e-02,  8.3029e-02,  2.5930e-02,
        -6.7321e-02, -4.8350e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9815e-03,  9.5514e-01,  5.7736e-03,  3.8279e-03, -4.6182e-04,
        -3.8941e-04, -1.3041e-03,  5.6628e-04, -2.7380e-04,  8.3529e-04,
         3.2288e-03, -1.5223e-03,  2.5715e-03, -2.3258e-03,  2.3787e-03,
         2.5023e-05,  3.0420e-04, -5.8543e-04, -1.6723e-03,  2.6215e-03,
        -1.1667e-03,  2.1162e-04,  3.2885e-03,  1.1373e-03,  5.9644e-03,
        -3.4008e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2870e-02,  9.2152e-01,  2.4082e-02,  2.3553e-03,  5.5179e-04,
        -4.8699e-03,  3.5946e-03,  1.9130e-03, -2.1598e-03, -1.4147e-03,
        -8.6978e-04,  2.8464e-03, -4.0828e-04,  7.0424e-04, -9.6306e-04,
         9.5808e-04, -1.3344e-03,  8.3911e-04, -9.2640e-04, -2.9308e-03,
        -7.9242e-04, -5.0680e-04, -4.0624e-05,  3.7114e-04, -2.3243e-03,
        -8.2586e-04,  1.2769e-04, -1.9255e-03, -2.5581e-03,  1.2430e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4765e-01,  3.6608e+01, -1.9700e-01,  1.1352e-01,  2.7903e-02,
        -7.1224e-02,  2.1672e-01,  1.1628e-01, -2.1132e-02,  4.1312e-02,
        -2.1379e-02,  4.0265e-02,  9.8189e-02,  1.3930e-01, -8.1618e-02,
        -8.2863e-02,  6.4465e-03,  6.0264e-02, -1.2777e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 2.1638e-04,  1.1041e-01,  3.7538e-04,  6.8071e-04,  1.0437e-04,
        -2.6251e-04,  3.9240e-04,  2.2611e-04,  1.4692e-05,  6.2629e-04,
         9.6826e-04,  2.2377e-05,  1.5871e-04,  2.2615e-04,  2.6908e-04,
        -7.3289e-05,  2.1498e-04,  1.2414e-04,  1.5072e-04,  3.2526e-04,
         1.5206e-05,  9.6024e-05,  3.7941e-05, -1.4153e-04, -3.9509e-05,
        -1.8710e-04, -1.8826e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6481e+00,  1.2486e+02, -6.3020e-01, -5.2052e-02, -2.6547e-01,
        -3.0524e-01, -1.6915e-01, -1.8792e-01,  2.8186e-01,  3.4504e-01,
        -2.5262e-01,  5.9672e-01,  1.0773e-01,  8.6655e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9942e-03,  1.2413e+00,  2.3645e-02,  1.1624e-04, -1.7375e-03,
         4.3263e-03, -2.6157e-03,  8.3096e-03, -7.4059e-04, -3.4419e-04,
         2.7534e-03, -1.3478e-03,  7.1588e-04,  2.7942e-03, -1.0742e-03,
         3.2728e-04, -3.0245e-03, -1.6413e-03,  1.3763e-03, -2.6921e-03,
        -2.8494e-03, -2.9084e-03,  1.8220e-03, -1.0204e-03,  1.2706e-03,
        -5.7009e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0802e-02,  3.4300e+00,  1.8638e-02, -1.3568e-02, -5.6876e-03,
        -2.2177e-02, -2.7809e-03,  4.7119e-03,  6.8361e-03,  1.8490e-02,
         2.9464e-03, -5.4989e-03,  4.4287e-03,  5.6797e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5241e-01,  7.6863e+01, -5.4957e-01, -1.6887e-01,  5.6435e-02,
        -2.5493e-02, -2.0851e-02,  8.0059e-02,  1.3507e-02, -1.4421e-02,
         2.0810e-02,  3.9313e-02,  1.2594e-01,  4.2825e-03,  2.2039e-01,
        -7.6439e-02,  8.4598e-02,  4.3874e-03, -7.9618e-02,  1.8638e-01,
        -6.1174e-02, -6.2268e-02, -4.7959e-02,  9.0047e-03,  1.4085e-01,
        -2.5376e-01, -3.7350e-01,  1.0883e-01, -2.0006e-02, -1.8050e-03,
        -1.0667e-01,  9.9332e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5694e-02,  2.4794e+01,  2.4576e-01, -1.3652e-01,  7.6008e-02,
        -1.8851e-02, -3.7729e-02, -3.5444e-02,  5.5561e-02, -2.3149e-02,
         2.9467e-02, -1.0395e-02,  6.2295e-02, -1.0325e-02,  5.3228e-03,
         2.8157e-02,  3.9242e-02,  2.3015e-02,  1.3319e-02,  6.3600e-03,
         3.6186e-02,  7.7777e-03, -1.0868e-03,  1.0978e-01, -1.3805e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0213, 7.4675, 0.0076, 0.0446, 0.0267, 0.0195, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3389e-03,  7.4875e-01, -6.6198e-04, -1.9210e-03,  7.6753e-04,
        -7.4333e-04, -1.9348e-03, -2.1156e-03, -1.5024e-04,  8.1743e-04,
         3.9836e-05, -1.3351e-03,  3.0685e-04,  5.1522e-04,  5.3128e-04,
        -8.7867e-05,  5.1258e-04, -1.8896e-03, -1.4766e-03,  1.4913e-03,
        -8.8779e-04,  1.1020e-04, -5.3882e-04,  2.6882e-04, -5.5538e-05,
         2.4678e-03, -1.5567e-04,  7.1377e-06, -7.3156e-04,  1.1890e-03,
        -1.5430e-04, -1.5481e-03, -8.9440e-04, -2.8009e-04, -2.9441e-04,
         1.1852e-03,  1.3544e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2267e-01,  7.3826e+01, -3.5716e-02,  5.9397e-01,  4.2049e-02,
         6.8377e-02, -1.3806e-01,  3.1257e-01, -2.7623e-01,  5.0367e-02,
        -1.4703e-02,  9.9985e-02, -3.2439e-02, -6.1441e-02, -1.2573e-01,
        -2.7461e-01, -8.5630e-02,  1.3712e-01,  6.0578e-02,  2.6263e-01,
        -4.1467e-01, -4.8492e-04,  1.4055e-01, -3.3008e-02, -9.5946e-02,
        -6.2781e-02, -2.6605e-01,  4.0193e-01,  3.0287e-02,  1.6113e-01,
        -1.1415e-01, -2.8488e-02, -2.8771e-02, -5.1026e-02,  3.3213e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9674e+00,  1.1589e+02, -1.7894e+00, -3.4520e-01, -8.6845e-01,
         3.2000e-02, -4.9323e-01, -3.5258e-01, -6.5427e-01, -7.3854e-01,
        -4.4123e-01, -6.8575e-01, -3.0699e-02, -2.7584e-03,  4.6233e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2982e-01,  1.1969e+01,  1.3389e-01, -1.5824e-02, -4.5306e-03,
         8.6130e-03,  3.9288e-03,  7.2531e-03, -1.6219e-02,  6.1824e-03,
         1.6290e-03, -1.3615e-02, -5.7216e-03, -5.9441e-03,  6.5548e-03,
        -7.7911e-03,  6.2265e-04, -2.1001e-02,  1.6118e-02,  1.6053e-02,
         5.2659e-03, -2.6166e-03,  8.0693e-04, -6.8067e-03,  1.4765e-02,
        -1.7315e-02,  5.4147e-03,  3.0222e-03, -7.3820e-05,  5.2910e-03,
         2.4676e-03,  6.1568e-04, -1.6850e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3337e+00, -2.9064e+02,  5.4267e-01,  9.2788e-01,  3.9238e-01,
         7.9608e-01,  4.5808e-01,  2.1042e-01, -1.1706e+00, -2.7419e-01,
         4.0856e-01, -1.1272e+00,  4.1569e-01,  4.0430e-01, -6.5810e-02,
         6.2953e-02, -6.7926e-02,  3.9901e-01, -5.6210e-01, -9.9966e-02,
        -1.1367e-01,  3.2865e-01,  5.0509e-01, -9.9421e-02,  1.4873e-01,
         3.1735e-01,  5.7207e-01, -6.9690e-01,  1.1741e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.7306e-03,  1.2487e-01, -7.7878e-04,  4.8760e-04, -4.8196e-04,
         3.5229e-04,  4.2597e-04, -4.6811e-04,  1.2049e-04, -1.2584e-04,
        -4.1517e-05,  1.5451e-04,  6.5616e-05,  4.0584e-05, -6.8496e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4977e-04,  5.8925e+00,  8.7470e-02,  1.0741e-02,  8.9962e-03,
        -1.0116e-02, -2.3601e-02, -6.5925e-02, -1.2936e-02,  4.2494e-02,
        -2.8919e-02, -1.5377e-02, -5.4761e-02,  7.2393e-04, -2.2458e-03,
         4.9923e-03,  1.3225e-02,  3.7871e-02,  4.7889e-02, -2.7319e-02,
        -7.5382e-03,  1.0890e-02,  9.4973e-03, -5.7966e-03, -1.1916e-03,
        -3.5015e-03,  3.3670e-02,  2.4464e-02,  3.0811e-02,  3.2597e-02,
        -1.1305e-02, -3.3173e-03,  7.0540e-03,  8.7574e-03,  1.3244e-03,
         2.4364e-02,  2.0192e-02,  2.1068e-03,  1.2410e-02,  6.0607e-03,
        -1.5323e-03, -1.0397e-02,  1.3806e-03, -1.0009e-02,  2.7623e-03,
         2.1549e-02,  3.9911e-02,  4.2984e-02, -2.6919e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8734e-01, -3.5661e+01,  8.7772e-03,  1.1644e-02,  1.0417e-01,
        -1.3503e-01,  6.8576e-02,  4.2609e-02,  1.5194e-01, -1.2107e-01,
         7.9652e-02,  1.0172e-01,  1.5154e-01,  8.2967e-02,  5.2299e-02,
        -1.4077e-03, -2.1172e-01, -2.2552e-02, -7.2607e-02, -2.5647e-01,
         6.3442e-02,  8.0428e-02,  6.7535e-02, -9.1926e-02,  4.1633e-02,
         1.7819e-01,  1.3510e-01, -1.1019e-01,  3.9445e-02,  6.7642e-02,
        -1.1717e-01, -4.6883e-02, -2.3318e-01,  4.0280e-02, -1.1190e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5378e-04,  2.8330e-01, -2.7945e-03,  3.3528e-04,  1.7516e-03,
        -3.8512e-04, -9.1675e-05,  3.4047e-04,  1.4132e-03, -3.7196e-04,
        -3.1174e-04, -7.4191e-05, -2.3498e-04, -4.2648e-04,  1.0582e-03,
         4.8002e-04, -4.4682e-04, -1.1974e-03, -6.5631e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5638e-02,  1.8066e+00, -1.4163e-02, -1.3191e-02, -2.7636e-03,
         9.2615e-03, -4.4534e-03, -4.2633e-03,  3.3879e-03,  3.7249e-04,
         1.0224e-02,  7.4397e-03, -8.3995e-03, -5.3317e-03, -3.5860e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6699e-04,  3.1712e-01, -4.4394e-03,  3.9154e-04, -1.1549e-03,
        -2.7419e-04, -1.0444e-03, -6.5358e-04, -1.2209e-05,  5.4500e-04,
        -1.9181e-03,  2.1913e-04,  8.1041e-05, -1.3803e-03,  3.8295e-04,
        -1.2872e-03, -9.7304e-05, -1.8425e-03,  7.4083e-05,  1.3451e-03,
         1.0418e-03, -3.1105e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7023e-03,  2.2977e-01, -3.2549e-03,  2.9547e-04, -1.3839e-03,
        -5.4425e-04, -2.9587e-03, -1.7994e-03, -1.3460e-03,  1.0958e-03,
        -6.4963e-04, -2.2274e-03, -2.2640e-04, -7.3166e-04, -1.1391e-04,
         8.7256e-04, -2.5371e-04, -1.6861e-03,  1.2208e-03, -6.2069e-04,
         6.9900e-04,  7.8567e-04,  1.0851e-04, -6.3225e-05, -1.5763e-03,
         3.1956e-04, -1.4726e-04, -2.0951e-03,  1.3050e-03, -4.7871e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0007,  0.1076, -0.0016, -0.0005,  0.0003, -0.0003, -0.0010,  0.0002,
         0.0015,  0.0001, -0.0029,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6791e-02,  9.3078e-01,  4.8841e-03, -4.4989e-03, -1.7656e-03,
        -1.6374e-03,  3.9766e-03, -2.7681e-03,  1.2744e-03, -1.7618e-03,
        -3.3833e-03,  1.6568e-03, -1.2525e-03, -2.1515e-03,  1.4265e-04,
         3.7605e-03, -1.4756e-03,  3.6809e-04, -1.1986e-03, -2.4964e-03,
        -2.7542e-03, -1.1956e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.2907e-03,  4.3154e-01,  4.7907e-03, -1.3369e-03, -6.1573e-04,
         1.0526e-03, -4.1542e-04,  5.1647e-05, -9.6577e-04, -7.3811e-04,
        -5.8717e-04, -1.2346e-03,  1.1833e-04, -4.4433e-04, -4.3343e-04,
        -1.4420e-03, -1.8038e-04, -8.5537e-04, -8.6010e-04,  6.4339e-05,
        -9.2237e-04, -8.9303e-04, -2.6352e-04, -1.8219e-04,  2.5598e-03,
        -2.9031e-04, -1.6329e-03, -1.9283e-04, -1.4297e-03, -1.6844e-03,
         8.9598e-04, -8.5555e-04,  7.3438e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4461e-03,  6.8044e-01, -3.6654e-04, -7.9859e-03, -2.8548e-03,
        -6.2152e-04, -1.2593e-03, -4.8303e-03, -1.1627e-03, -1.3904e-03,
        -4.6021e-03, -1.0303e-03, -1.7874e-04,  8.8377e-04, -7.3988e-04,
        -1.3343e-03,  2.6987e-03,  1.3246e-03, -5.6177e-04, -1.3336e-04,
        -8.1233e-04, -5.0870e-03,  1.0161e-03,  1.6619e-04,  2.0678e-03,
         3.0083e-03,  9.9470e-04, -2.5414e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4258e-02,  2.1921e+00,  4.8616e-02, -3.5108e-03, -1.3864e-03,
        -9.1048e-03,  3.4125e-02,  8.9419e-03, -5.2645e-03,  6.9874e-03,
        -2.2933e-05, -5.4570e-03, -4.9464e-03,  1.7542e-03, -3.6135e-03,
        -2.3747e-03, -4.8704e-03, -4.1623e-03, -7.1848e-03, -1.3472e-02,
        -4.1349e-03,  5.7658e-03, -2.2519e-03,  1.6207e-02, -1.0349e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-2.6113e-02,  9.9045e+00,  6.6717e-02, -3.3214e-02, -3.2482e-02,
         1.8686e-02, -1.3261e-02, -1.5534e-02,  1.2802e-02,  3.7070e-03,
         2.3262e-02,  2.8104e-02,  1.1001e-03,  3.8212e-02, -1.5985e-02,
        -3.9209e-02, -2.7679e-02, -1.1138e-02,  3.6741e-03,  1.1712e-02,
        -1.4653e-02,  6.6316e-03,  5.3939e-03,  1.0703e-02,  4.9553e-02,
         8.5469e-03, -1.3031e-02, -8.3926e-03, -1.2605e-02,  9.1812e-03,
        -8.1752e-03, -5.1809e-03, -1.1385e-02, -5.1578e-03, -1.3912e-02,
        -8.6875e-03, -1.5262e-02, -2.1354e-02,  1.7150e-02, -3.4513e-02,
         2.9233e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6292e-03,  3.5108e-01, -4.4230e-04, -1.0374e-03, -1.4019e-04,
         7.8549e-04,  1.1185e-03, -3.0027e-04,  4.4611e-04,  7.6189e-04,
         5.2932e-04, -1.5754e-03, -6.5809e-04,  5.3912e-04,  1.8482e-03,
        -1.9815e-04,  4.2061e-04,  1.3569e-03,  4.0655e-04, -2.2240e-03,
        -4.8158e-04, -2.8031e-04,  1.5175e-05,  3.8084e-04, -1.5252e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8971e-02,  2.0433e+00,  1.2543e-02, -5.0562e-03,  8.2658e-04,
        -4.3865e-03, -3.2841e-03, -2.4224e-03, -7.4577e-03,  4.1771e-03,
        -4.6320e-03,  1.6625e-03,  2.9353e-03,  2.3019e-03, -7.6265e-04,
        -3.2069e-03, -5.5278e-03, -2.5177e-04,  3.1171e-03,  1.6341e-02,
         1.0303e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.1061e-02,  5.2269e+00, -7.0967e-02, -1.0624e-02, -1.5898e-02,
        -9.7073e-03, -2.2052e-02, -2.2150e-02, -1.4100e-02, -1.8511e-02,
        -3.3309e-03, -4.2253e-02, -8.3008e-03, -6.3914e-03, -1.3845e-02,
        -2.0272e-02, -3.3551e-02, -1.4601e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0651e-02,  1.6533e+01,  1.2515e-01,  3.7604e-02, -5.0142e-02,
         6.1363e-04,  8.0291e-02, -7.0607e-02, -1.0977e-01,  1.7326e-02,
        -2.2779e-02,  5.0390e-03, -2.4303e-02,  4.7005e-02, -2.1664e-02,
        -3.6850e-02, -8.3523e-02, -4.7972e-02, -1.8092e-02, -2.9939e-02,
        -2.4385e-02, -1.1187e-02, -2.2625e-02,  2.9850e-03, -1.7791e-02,
        -6.9620e-03, -5.0762e-02, -2.5454e-02, -1.3673e-02, -6.4307e-03,
        -2.1106e-02,  8.9597e-03, -4.1397e-02, -2.7624e-02,  2.6918e-02,
        -4.4150e-02, -4.7013e-02, -1.1818e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0547e+00,  3.1796e+02, -1.0911e+00,  3.9845e-01,  4.0935e-01,
        -3.9546e-01, -1.1704e-01,  1.0957e+00, -4.6388e-01,  5.9742e-01,
         1.2479e+00, -2.2109e+00, -2.9997e-01,  5.3478e-01, -5.1888e-01,
         3.1323e-01,  5.8206e-01,  3.9579e-01, -1.0794e-01,  1.8335e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0318e-01,  7.3574e+01,  1.6671e-01,  2.0285e-01, -1.7940e-01,
         2.4871e-01,  1.8990e-01,  2.8270e-01,  2.0470e-01, -4.2629e-02,
        -5.2064e-02, -2.3936e-01,  8.9174e-02,  1.6774e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.4466e-04,  2.3272e-01,  9.7337e-04, -1.4098e-04, -1.4065e-03,
        -9.4201e-04, -1.7972e-03, -4.6176e-04, -2.4073e-04, -9.9545e-04,
        -8.0456e-04, -9.9771e-05, -5.4391e-04,  3.8398e-05,  6.4784e-04,
        -3.0456e-04, -1.5471e-03, -2.7857e-04, -1.1076e-04,  3.2367e-04,
        -1.0202e-03, -1.1133e-03,  2.3784e-04, -2.8978e-04, -5.8797e-04,
        -1.3335e-03, -6.3798e-04, -1.0416e-03, -5.9030e-04,  1.5491e-04,
        -1.8268e-04,  2.4120e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6608e-01,  5.9198e+01,  7.3481e-02, -2.2921e-01, -5.5669e-01,
        -3.6468e-01,  3.4417e-01, -1.3495e-01, -7.1192e-03,  1.0038e-01,
        -6.6689e-03,  3.7001e-01, -4.8553e-02,  8.3645e-02, -7.7547e-02,
         5.9670e-03, -2.9016e-02, -7.1737e-01, -3.2306e-01, -1.7659e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0198e-01,  7.0911e+01,  3.4251e-02, -5.7615e-01,  6.2301e-02,
        -8.5255e-02, -1.6738e-01, -1.0768e-01, -1.9112e-02,  8.1840e-02,
        -4.2020e-02, -3.0758e-02,  1.0817e-02,  1.1244e-02,  4.7582e-02,
        -6.5373e-02,  4.5881e-02, -1.6118e-01,  3.4485e-02,  3.0354e-02,
        -3.9438e-02,  2.8399e-02, -2.5860e-02,  1.1202e-02,  1.4469e-01,
        -2.0320e-01,  1.2365e-01, -2.0711e-02,  1.5631e-02, -1.3883e-01,
         6.3434e-02,  1.7081e-02, -1.2559e-01,  2.4151e-01, -3.4120e-02,
         6.9263e-03, -6.9513e-03,  6.5085e-02,  4.2768e-02, -7.5499e-02,
        -1.5779e-01, -2.9737e-02, -1.2596e-01, -4.3421e-02,  4.6238e-02,
         9.3887e-02, -2.2762e-01,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5858e+00,  1.6561e+02,  5.4335e-01, -5.8544e-01, -4.3072e-01,
         1.6950e-01,  1.3851e-01, -5.0982e-01,  6.7433e-01,  3.3252e-01,
         4.8278e-01, -7.3403e-02, -1.6451e-01,  5.2542e-01,  4.2186e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.7882e-04,  2.3868e-01, -1.1047e-03, -4.8196e-04, -3.7701e-04,
         1.8837e-05,  1.6294e-04,  5.3676e-04,  1.4655e-04, -2.1889e-04,
         1.2447e-04,  1.0338e-04, -1.8886e-04,  8.4452e-04, -1.1042e-03,
        -3.5173e-04,  2.3462e-04, -8.1970e-04, -2.7958e-04, -3.6155e-04,
        -2.8017e-04, -4.7063e-04, -2.0306e-04,  5.7453e-04, -1.9267e-04,
         1.2297e-04,  9.9185e-04, -1.2315e-03,  8.2146e-04,  3.1975e-04,
        -2.1029e-05, -8.0741e-04,  3.4788e-05, -1.2549e-04,  2.2671e-04,
        -1.9189e-05, -1.0135e-04,  1.5975e-04, -7.5751e-05, -1.5942e-05,
        -3.4645e-04,  1.1302e-04, -4.2815e-06,  1.8638e-05,  4.1967e-04,
         2.2982e-04,  2.6891e-04, -1.1408e-03], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-6.6759e-03,  9.5851e-01,  1.0232e-02, -9.2451e-04,  5.0762e-03,
         4.9511e-03,  4.1986e-03,  1.3207e-03,  9.2652e-04, -3.7737e-03,
         3.6753e-04,  3.0674e-03,  3.1295e-04,  1.0621e-03, -7.9936e-04,
        -4.6970e-03, -3.4863e-03, -4.1077e-03, -9.1300e-04,  6.7027e-04,
        -1.5898e-03, -4.2492e-03, -3.0575e-03,  4.4216e-03,  1.1014e-03,
        -2.3087e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0618e-03,  4.9415e-01, -1.1866e-03, -4.8956e-04,  7.6793e-04,
         1.2355e-04, -1.0651e-03, -9.1530e-04, -1.1097e-04,  7.9841e-05,
        -2.3951e-03, -5.8362e-04, -3.5479e-04, -5.5032e-06, -1.4893e-05,
         1.4228e-04,  3.0694e-04, -2.9207e-04,  8.8123e-04, -9.4847e-04,
        -2.1435e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5829e-03,  1.9274e-01, -9.8831e-04, -1.7223e-03,  1.2250e-04,
        -6.8980e-04, -3.3451e-06,  7.7665e-04, -1.3738e-04, -3.6345e-04,
        -3.9552e-04,  1.4665e-04, -1.7581e-04, -1.3745e-04,  1.1421e-04,
        -2.4583e-05, -4.8321e-04, -1.9756e-04, -1.4819e-05,  5.3867e-04,
        -3.2731e-04, -4.9688e-04,  4.3628e-04,  1.7342e-04, -8.3750e-05,
        -5.8657e-04, -1.0263e-04,  1.1457e-04,  8.0153e-05,  6.7563e-05,
        -1.4479e-04, -3.8619e-04,  4.4080e-04,  2.1756e-04, -7.4812e-04,
         6.9336e-04,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1734e-01,  8.0694e+00,  2.7079e-02, -6.8923e-03, -2.8011e-02,
        -4.3856e-02, -9.2261e-03,  6.5263e-02,  3.1387e-03, -1.4477e-02,
         3.2421e-02,  2.8671e-03, -2.8381e-03, -5.4998e-02,  9.0455e-03,
         1.2814e-02,  3.1722e-02, -4.1427e-02,  2.1685e-02, -6.2015e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0259e-03,  1.1017e+00,  1.4098e-02,  1.3732e-03, -1.2197e-02,
        -3.4005e-03, -1.8261e-03,  8.0415e-03,  5.0385e-03, -1.3447e-02,
         5.0144e-03, -3.1375e-03, -1.9182e-03,  8.9353e-03,  5.2677e-03,
        -2.7241e-03,  2.3872e-03, -1.8689e-03,  1.1647e-03, -2.5382e-02,
        -2.9259e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6918e-02,  6.6960e+00,  3.1322e-02, -9.2331e-03,  5.2644e-04,
         1.1416e-02,  2.8020e-03, -2.0206e-02,  3.9806e-02,  1.1378e-02,
         5.0214e-03, -2.5668e-02,  4.7728e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5360e-04,  1.0727e+00,  2.0529e-02, -2.1608e-03,  7.7143e-04,
        -1.5912e-03, -5.5060e-04, -6.5921e-04, -2.0814e-03,  1.5804e-03,
        -2.2792e-03, -6.6576e-04, -1.0574e-03,  8.9505e-03,  2.6296e-04,
         1.7771e-03,  1.1359e-02, -2.3836e-03, -1.9949e-04, -4.0521e-04,
        -9.5651e-04, -2.3997e-03, -8.3492e-04, -1.2339e-03,  1.6549e-03,
        -1.7130e-03, -9.4006e-04,  7.8662e-03,  2.2094e-05,  6.2922e-04,
        -1.5429e-03, -2.3854e-03,  8.4161e-03,  6.3144e-04,  5.1083e-04,
        -6.2496e-03,  7.8048e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3013e-03,  1.3271e+00, -3.2878e-03, -2.7743e-03,  2.9530e-03,
         3.2069e-03,  8.5941e-04, -9.1921e-03,  2.8801e-03,  2.7127e-03,
         1.9423e-03,  4.1488e-03,  3.2530e-03,  1.2414e-03, -1.7476e-03,
         3.0867e-04,  1.4246e-03,  2.4688e-03,  2.5296e-03, -2.2769e-03,
         4.0435e-03,  2.4207e-03,  1.2390e-04, -3.2049e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.5531e-01, -5.7209e+01, -7.9855e-01, -1.8431e-01, -3.2301e-02,
         3.2154e-02,  8.4988e-02, -1.0040e-01,  1.4099e-01,  2.4317e-01,
         1.6390e-01, -3.3693e-01, -5.4394e-01, -3.4684e-01, -3.9979e-02,
         3.0937e-01, -8.1202e-02,  6.1122e-02,  8.2717e-03,  6.1365e-02,
        -3.4165e-02,  7.7359e-02, -2.5036e-02,  1.5574e-02, -1.1205e-01,
         3.3686e-02,  1.2099e-01,  2.0516e-01,  1.0555e-01, -1.6638e-01,
        -1.0332e-02, -4.4120e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8206e-04,  5.3828e-01,  2.5420e-03, -7.3216e-04, -2.3850e-03,
        -2.4680e-03, -2.7229e-03, -1.8265e-03,  8.5922e-04, -2.8020e-04,
         6.8871e-04, -5.7268e-04, -9.2440e-04, -6.9698e-04, -1.0531e-03,
        -1.2811e-03,  1.3841e-04, -5.7071e-04, -4.3322e-04,  5.4327e-04,
        -1.9954e-03,  1.1303e-04, -1.2385e-03, -9.7192e-04,  6.5787e-05,
        -7.2566e-04, -2.2574e-04, -1.8089e-04, -1.5132e-04, -1.4446e-04,
        -5.7650e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9696e-01,  1.8900e+01,  2.9704e-01,  3.8066e-02,  1.0575e-01,
         4.8468e-02,  9.6961e-02, -1.1233e-01,  5.7853e-02,  7.0692e-02,
         3.2777e-02,  1.3728e-01,  6.9201e-03, -6.2485e-03,  5.5600e-02,
         3.9735e-02,  3.7153e-02,  1.1480e-01,  5.4469e-02,  2.1290e-02,
        -1.1463e-02, -3.8880e-02, -5.3132e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7397e+00,  1.1468e+02, -9.1669e-01, -5.2675e-01, -3.3437e-01,
        -2.2415e-01, -2.6790e-01, -4.5843e-02, -3.8567e-02, -7.2438e-02,
        -4.3055e-02, -1.8808e-01, -6.8268e-02,  5.7545e-02, -7.0055e-02,
        -1.6626e-01, -1.0374e-01, -1.4723e-01,  1.3309e-01,  2.0557e-02,
        -2.2032e-01, -1.2885e-01,  1.1795e-01,  2.3413e-01,  6.2933e-02,
         5.2345e-02, -3.2862e-01,  4.8335e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-3.5784e-05,  1.4985e-01, -8.8173e-04, -1.3897e-06, -2.5433e-04,
        -3.0856e-04, -9.4698e-05,  4.3513e-04, -3.8322e-04, -6.3519e-06,
         6.5178e-05, -2.9414e-05,  3.2941e-04,  5.4776e-05, -3.2669e-04,
         4.1317e-04, -9.0957e-05, -8.6634e-04,  5.5234e-06,  4.8954e-05,
        -4.0529e-05, -1.6036e-04, -6.7075e-05, -2.6503e-04, -1.1377e-04,
        -3.1270e-04, -1.0421e-04, -1.0489e-04, -1.5133e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.2240e-02,  2.0744e+01, -9.5114e-02,  2.8808e-02,  3.3874e-02,
        -2.0605e-02, -8.0628e-03, -5.3048e-02,  5.8291e-02,  1.9521e-02,
         4.6232e-02, -6.6748e-02, -3.0420e-02,  8.7665e-02,  1.1382e-03,
        -1.0865e-02,  6.8058e-02, -3.4183e-02,  1.6816e-01, -2.4518e-02,
        -2.2814e-03, -3.4590e-02,  2.2326e-02,  6.2543e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5522e-01,  3.2413e+01,  4.6468e-01,  2.4459e-02, -5.8610e-03,
        -2.5799e-02, -3.1053e-03, -2.5475e-02, -1.3763e-01,  3.2130e-02,
         1.7102e-02, -2.5666e-02,  1.2678e-01,  1.4744e-01, -1.2962e-02,
         4.6408e-02, -3.0408e-02, -2.0063e-02,  2.9515e-02, -3.3298e-02,
        -1.0103e-01, -3.6924e-04,  5.4905e-02,  6.6505e-02,  2.2297e-02,
        -1.8247e-02, -3.2183e-04, -1.0263e-04,  4.7929e-02,  4.6691e-02,
         1.0075e-02, -1.0124e-02, -1.6546e-01, -1.1011e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3245e+00, -2.5277e+02,  1.2550e+00, -8.0726e-02,  8.4816e-01,
         1.1581e-01, -9.1456e-01, -1.6528e-01,  6.1438e-02, -1.3808e+00,
        -6.6808e-01, -1.0068e+00, -5.2185e-01, -8.1484e-02, -9.9595e-01,
         1.3076e-01,  8.7280e-01, -4.4490e-02, -4.6708e-01, -1.3354e+00,
        -3.7322e-01, -6.7251e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9640e-04,  4.4237e-01, -4.0060e-03, -2.1355e-04, -1.9429e-03,
        -1.5849e-03,  1.1958e-03,  2.7033e-03,  1.7360e-04,  5.3711e-05,
         9.9040e-04, -3.7609e-04,  1.0942e-03,  2.7751e-04, -7.0636e-04,
        -7.7474e-05, -1.1963e-03, -6.4113e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.9467e-03,  2.1597e+00, -3.4798e-02, -1.9196e-03, -2.5404e-03,
        -1.0462e-03,  1.5582e-03, -4.5203e-04, -1.0838e-02,  6.2499e-03,
         2.3538e-04, -1.0979e-02, -2.6757e-03,  4.2524e-03, -4.1533e-04,
        -3.2500e-05,  1.3540e-03, -6.8519e-03,  2.2290e-03, -7.7307e-04,
         1.2339e-03,  4.1412e-03,  1.0434e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1281e+00, -3.0921e+02,  1.6449e+00, -1.8559e+00,  3.9951e-01,
        -5.8419e-01, -7.1938e-01,  3.0375e-01,  4.6922e-01,  1.7293e+00,
        -7.8866e-01, -1.2538e-01,  1.4664e-01,  4.7470e-01, -7.0766e-02,
        -1.5235e-01,  4.0155e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4421e-01,  2.2556e+01,  3.7026e-01, -7.8932e-02, -1.1390e-01,
         1.1310e-01,  1.3956e-01, -3.2692e-02,  8.5586e-02,  1.9920e-02,
         2.3417e-02, -4.2096e-02, -7.4326e-02, -1.6900e-02,  1.6165e-01,
        -1.5314e-02,  8.1385e-02,  2.2324e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0816e-02,  1.0221e+02,  1.1612e-01, -2.4236e-01,  3.7940e-01,
        -1.4537e-01,  1.2081e-01, -5.8215e-01, -1.6324e-01,  2.8479e+00,
        -2.2033e-01, -6.8106e-02, -7.8988e-01,  1.9640e-02,  4.3141e-01,
         4.8336e-01, -2.1774e-01,  6.5929e-01,  3.3548e-02,  3.2671e-01,
         1.5753e-01,  1.8669e-01,  3.3052e-01,  2.9066e-01,  1.7001e-01,
         1.1789e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5940e-03,  1.0176e-01,  1.2281e-03, -2.5299e-04, -7.0444e-04,
        -4.0259e-04, -1.0879e-03,  3.0798e-04,  1.6461e-04, -2.8879e-04,
        -1.5444e-04,  2.9328e-04, -3.5775e-04, -6.6919e-04, -5.9357e-04,
         1.6534e-04, -1.0899e-04,  1.5295e-04, -1.0721e-05,  1.2145e-05,
        -6.3258e-05, -8.7379e-05, -7.2358e-05,  1.8714e-05, -1.8468e-04,
         1.3647e-04, -3.2086e-06,  1.6948e-06, -1.7851e-04,  6.2697e-04,
        -1.2562e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1091e-03,  1.4958e+00,  4.8381e-03,  4.9468e-03, -7.9739e-03,
        -8.1455e-03, -1.0629e-03, -6.3080e-03, -2.6636e-02, -3.9710e-03,
        -5.5768e-03, -1.6568e-03,  2.3738e-03, -1.7842e-03, -9.9215e-03,
        -4.1996e-04, -1.0457e-02, -4.9239e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0044,  0.2732, -0.0005,  0.0031, -0.0014, -0.0004, -0.0021, -0.0019,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.7030e-03,  5.5040e-01,  1.7821e-03, -2.3218e-03,  3.0318e-04,
        -2.8455e-04, -1.9921e-03, -1.3939e-03,  1.8383e-03, -4.2789e-04,
         4.7628e-04,  5.4874e-04, -4.3243e-04, -9.1998e-04, -1.5198e-04,
         2.8601e-04,  4.9680e-04, -1.3734e-03,  2.8160e-04, -3.6201e-04,
         9.0481e-04,  4.8457e-04, -2.1797e-03,  4.2251e-05,  5.5071e-04,
        -5.3398e-04,  2.5782e-04,  1.3249e-03, -1.3173e-03,  1.1111e-03,
        -1.5552e-03, -1.0227e-03,  1.5973e-04,  4.3618e-04, -1.2443e-05,
        -1.7318e-04,  2.9682e-04,  1.0113e-03,  1.5509e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.8477e-04,  2.2357e-01, -1.7543e-04, -1.3513e-04, -3.5662e-04,
         9.0378e-05, -3.6241e-04,  1.2911e-04,  1.4404e-04, -3.7013e-04,
        -1.8368e-04,  1.0896e-04,  4.1381e-05, -1.7147e-04,  2.9533e-04,
        -1.3709e-03, -1.3511e-04,  1.6584e-05,  8.2077e-05,  5.1059e-05,
         3.8086e-04,  2.5683e-04, -1.1959e-04,  1.0316e-04, -2.2230e-04,
         2.7216e-04, -9.5544e-04, -5.2034e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5448e+00,  1.4641e+02,  4.3843e-01,  1.5123e+00,  4.9062e-01,
         4.2166e-01,  8.9404e-02,  1.9566e-01,  5.0134e-01,  7.0726e-01,
         2.5903e-01,  4.9042e-01,  1.7918e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3090e-03, -2.1812e+02,  2.0106e+00, -1.8958e-01,  7.1621e-01,
         2.5544e-01,  7.0796e-01,  4.4087e-01,  7.7896e-01, -9.8747e-02,
        -4.7896e-02,  1.0386e+00,  1.1092e+00,  9.9265e-01, -1.9981e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1709e-02,  2.9193e+00,  2.5725e-02, -1.0592e-02,  1.6584e-02,
         1.4001e-03, -1.3397e-03, -2.4565e-04,  2.1837e-03, -1.4616e-03,
        -4.5946e-03, -1.8966e-03,  3.7473e-03,  2.4350e-03,  6.6773e-04,
        -4.3319e-03,  9.8969e-04, -1.4104e-03,  2.8626e-03, -3.4816e-03,
         3.1154e-03,  9.2804e-04,  1.8904e-02, -1.5495e-03,  4.5104e-03,
         2.2856e-03, -1.9784e-03,  7.8127e-03, -2.2280e-03,  3.5203e-04,
        -2.4372e-03,  1.1226e-03, -7.1548e-03, -1.8157e-04,  2.2745e-03,
         1.7733e-03, -6.9231e-04, -9.1681e-04,  3.6653e-03,  1.0645e-03,
         1.6802e-02, -3.3814e-04,  8.9018e-04, -2.3138e-03, -2.8432e-03,
         9.6847e-03, -2.9091e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2759e-02,  5.9276e+00,  5.2300e-03,  1.6456e-02,  1.5065e-02,
         7.5867e-03, -5.2943e-03, -1.2140e-02,  2.6766e-02,  3.5106e-03,
        -8.5845e-03,  3.8932e-04, -1.5717e-02, -1.2823e-02,  5.1528e-03,
         9.4496e-03,  2.4447e-02, -5.7729e-03, -3.7633e-03,  9.2372e-03,
         5.5106e-03,  5.9202e-03,  5.6827e-03,  1.0866e-03, -5.9575e-03,
        -2.4978e-03,  1.9749e-02,  5.9435e-04, -2.1259e-03, -4.7304e-03,
        -1.1651e-02, -2.5270e-03,  1.2756e-02, -1.2561e-02, -5.2537e-04,
        -5.8013e-03,  2.7697e-03, -8.6557e-03, -6.7915e-03,  2.9071e-03,
        -1.4599e-02,  5.6837e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8257e-02,  4.6584e+00,  4.4340e-03, -2.9444e-02, -1.5275e-02,
         1.4327e-03,  1.9501e-02, -1.4998e-02, -1.1791e-03, -1.8693e-02,
        -3.8817e-02, -9.8704e-03, -1.2453e-02,  4.0367e-03,  1.5760e-02,
        -1.7938e-02,  1.6797e-02, -1.2511e-02, -2.8456e-03, -7.4923e-03,
        -7.4596e-03, -4.9825e-03, -1.6717e-02,  2.1903e-03,  1.0010e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4892e+00,  2.0591e+02, -1.7825e-01, -1.9456e-01, -8.0798e-01,
        -3.1433e-01,  6.6542e-01, -7.7973e-02,  1.4613e-01,  3.5493e-01,
         1.6983e-01, -3.4175e-01,  2.8095e-01, -1.8496e-01,  1.1182e-01,
         4.9354e-01, -4.9843e-03,  1.0753e+00,  7.4446e-02, -7.9404e-02,
        -2.1478e-01,  1.8318e-01, -7.4514e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0394e-02,  2.6627e+00,  4.5111e-02,  7.1857e-03,  1.6619e-03,
        -1.0968e-02, -6.9462e-03, -6.2578e-03,  6.3972e-04, -4.3659e-03,
        -2.3493e-04, -2.6019e-03,  1.1109e-02,  3.0881e-03, -3.1989e-03,
        -2.2696e-03, -7.9949e-04,  4.0265e-03,  5.8260e-03,  4.6041e-03,
        -5.3067e-03,  3.2437e-03, -7.1973e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6374e-02,  2.3361e+00,  1.5483e-02,  3.7017e-03, -1.9630e-02,
         9.5295e-03,  4.0100e-03, -5.2552e-03,  1.1195e-02, -4.9504e-03,
        -3.0569e-03, -2.4893e-02, -2.0618e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4731e-02,  4.8259e+00, -2.0919e-02,  1.8638e-02, -1.9614e-02,
        -3.9533e-03,  1.0670e-02,  1.0854e-02,  4.0248e-02,  1.1352e-02,
        -6.6024e-03, -9.2824e-03,  1.7871e-02, -1.5199e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4448e-02,  4.7671e+00, -7.1322e-03,  9.6717e-03, -1.5740e-02,
        -2.8080e-03,  2.2496e-03, -4.8738e-03,  1.7185e-02,  1.0042e-02,
         4.8737e-04,  1.3913e-02,  7.7826e-05,  3.2736e-02,  1.3988e-03,
         5.8021e-03,  6.1441e-03,  1.1574e-02, -2.8839e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-2.1262e-02,  2.9876e+01,  4.6652e-02, -2.3224e-01, -1.7393e-01,
        -2.0542e-02, -2.4246e-02,  3.0483e-02,  1.4393e-02, -1.5401e-01,
         4.1125e-02, -5.8870e-02,  1.3140e-02,  2.0383e-02, -4.0175e-02,
        -9.6657e-02,  6.3412e-02,  2.8702e-02,  1.2946e-02, -3.3470e-02,
         9.4903e-03,  2.5584e-02, -9.1657e-02,  7.7665e-02,  2.0675e-02,
         5.3686e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8314e-03,  9.8226e-02,  1.7040e-03, -1.8295e-04,  4.4179e-05,
        -3.0080e-04, -4.1884e-05, -6.4256e-04,  7.8205e-05,  3.3109e-04,
         2.8523e-04, -2.4882e-04,  3.5435e-04, -1.8985e-04, -3.3292e-04,
        -5.4487e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0739e-02,  2.5737e-01, -3.9784e-04,  2.6597e-03,  5.8665e-04,
        -4.1098e-04,  1.1445e-03, -1.7350e-03,  1.7435e-04,  1.9650e-03,
        -1.0469e-05, -1.4577e-03,  3.1258e-05,  1.9239e-03,  2.0096e-03,
         3.8774e-04, -2.6733e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0207e-01,  5.6278e+00,  5.6971e-02, -3.6529e-03,  5.1030e-02,
         3.6813e-02, -9.1832e-03, -3.4620e-02,  1.6252e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8373e-02,  1.9661e+00, -8.1500e-03, -6.8280e-03, -1.5603e-02,
        -2.1443e-03,  3.4399e-03,  3.3659e-03, -2.5589e-04, -1.5385e-04,
         7.0380e-04,  2.1444e-02, -1.5678e-03, -3.4029e-03,  4.6990e-04,
         2.8039e-04,  7.4215e-03,  3.0295e-03, -2.3353e-03, -2.1023e-04,
        -1.3844e-03,  2.9019e-03,  4.1411e-03, -3.5987e-03, -4.0706e-04,
         3.5469e-04,  5.6968e-04, -9.6687e-04,  1.9613e-03,  9.0088e-04,
         2.6146e-03, -5.8972e-04, -3.9842e-04, -8.1100e-04,  2.2355e-05,
        -2.1537e-03, -2.8017e-03,  1.2952e-03, -3.1295e-03, -4.0239e-03,
        -1.8547e-03, -2.6162e-03, -5.3674e-03, -1.5925e-03, -2.7972e-03,
        -5.2981e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1244e-03,  2.5753e-01,  2.1400e-03, -5.6051e-05, -1.2759e-04,
        -6.8566e-04,  3.1944e-05, -6.3213e-05, -1.4163e-04, -1.4658e-04,
         1.2294e-04,  4.0949e-05, -6.6712e-04, -2.5517e-04,  1.0817e-04,
         1.2405e-04,  7.2143e-04,  6.9483e-04, -3.1965e-05,  7.4481e-04,
         1.0420e-03,  1.0038e-04,  2.5425e-04, -2.3895e-05,  6.2901e-04,
         1.4131e-04, -1.8986e-04,  1.2588e-04,  9.7255e-04,  6.0167e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.9686e-02,  4.3288e+00,  6.5921e-02,  3.9544e-02, -5.6449e-02,
        -1.1062e-02,  4.6365e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3829e-03,  1.0839e+00,  1.5315e-02,  9.1067e-03, -7.2659e-04,
         3.6291e-03, -7.2036e-03,  4.4855e-03,  3.8438e-03, -4.7705e-03,
         1.4771e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3152e-04,  2.2305e-01,  2.2266e-03, -4.3923e-04,  4.7847e-04,
        -1.7352e-03, -3.6641e-04,  1.0897e-05,  6.7372e-04,  5.1389e-05,
        -3.4346e-04,  1.5655e-03, -6.1813e-04, -3.6537e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0335e-02,  7.8110e-01, -2.1690e-04,  2.3497e-04,  1.6640e-04,
         1.8551e-04,  1.7775e-04,  8.6742e-04,  2.5723e-03, -2.4065e-04,
        -2.3136e-03, -1.9628e-04, -1.5172e-03,  5.0809e-04,  6.8818e-05,
         2.8336e-04,  4.5296e-04,  3.4171e-04,  2.4435e-05,  2.1000e-03,
         4.4894e-04, -1.3377e-04, -5.1100e-04, -1.0591e-03, -2.0825e-03,
        -3.5492e-04,  1.0141e-03,  1.1880e-04,  8.4114e-04,  5.9379e-04,
         8.9480e-04, -8.4695e-04,  6.1964e-04,  2.4989e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3891e-03,  1.2468e-01, -7.9544e-04,  7.1972e-05, -7.9345e-05,
         2.2264e-04, -4.5366e-04, -4.4696e-04,  9.5803e-05, -2.1092e-04,
        -4.2110e-04,  3.5697e-04, -1.0642e-04, -3.6800e-04,  1.4442e-03,
         2.9672e-04, -2.0215e-03, -6.2208e-04, -4.4906e-04, -3.2857e-04,
        -3.6334e-04, -2.2833e-04,  6.8219e-04, -1.6695e-04, -1.9782e-04,
         5.9394e-05, -1.1872e-04, -3.6729e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7993e-04,  7.1705e-02,  9.8665e-05,  4.5993e-05, -1.5357e-04,
        -5.9741e-05, -3.8704e-05,  2.3614e-05, -5.6744e-05,  4.2805e-05,
        -5.8593e-05, -1.0697e-04,  1.5265e-04, -7.8083e-05, -6.5090e-04,
        -2.7343e-04,  9.8100e-05,  1.3637e-04, -3.4186e-05, -4.8728e-05,
        -4.4268e-05,  8.7835e-05, -2.4149e-04, -8.9875e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-3.1508e+00,  1.2494e+02, -1.7524e+00, -8.0699e-01, -4.8754e-01,
         1.7645e-01, -7.2931e-01,  2.2031e-02,  1.6564e-02,  1.2146e-01,
        -2.8074e-01, -2.3091e-01,  4.1888e-01,  3.0930e-02,  5.2293e-02,
         2.7012e-01, -3.9086e-01,  1.7354e-01, -4.6958e-01, -2.8462e-01,
         1.0563e-02, -1.3530e-01, -2.2358e-01,  2.3438e-01, -1.5977e-01,
        -5.3859e-01,  8.3892e-02, -4.5332e-01, -3.0018e-01, -3.5848e-01,
        -1.0302e-01,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2947e-03,  3.8429e-01, -4.5619e-03,  2.1901e-03, -5.2965e-04,
         4.5274e-03,  2.4449e-04,  6.8306e-04,  7.4367e-04,  1.3152e-03,
         1.2195e-03, -1.6032e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4488e-03,  3.7044e-01,  7.4418e-04,  2.4819e-03,  2.3685e-03,
        -1.3034e-03, -8.1578e-04, -1.7120e-03, -1.5718e-03,  4.9225e-03,
         8.9650e-04,  1.1563e-03, -9.0011e-05,  1.1827e-03,  3.9637e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0405e-01,  1.1612e+02, -6.9131e-02, -3.4400e-01, -8.4462e-02,
        -2.9857e-01,  8.5333e-02, -1.6124e-01, -1.9129e-02, -4.1888e-02,
         3.9146e-02,  2.7978e-02,  3.3542e-01,  9.7322e-02,  8.5110e-03,
        -2.9141e-01,  5.9391e-02, -2.4756e-01,  7.3502e-01,  1.4913e-01,
         1.4661e-02,  4.3095e-02, -2.3705e-01, -5.3990e-02,  2.5585e-01,
         7.1113e-02,  5.0730e-02, -1.3734e-01, -1.3255e-01, -1.5649e-01,
         2.2977e-01, -8.2677e-02,  1.7001e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5093e-03,  2.1382e-01,  2.8302e-04, -1.9950e-04, -8.8316e-04,
        -1.4095e-03,  4.3793e-04, -1.7955e-04,  3.5508e-04,  8.9846e-05,
        -6.3920e-04, -2.3471e-04, -3.3045e-04, -5.9921e-04, -5.5113e-04,
         4.7441e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.4309e-01, 2.1859e+01, 1.4444e-01, 1.6765e-01, 1.5436e-01, 3.6835e-02,
        2.8299e-01, 1.8138e-02, 9.0125e-03, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4856e-03,  2.5217e-01, -1.0600e-03, -6.1957e-04,  1.7513e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7714e+00,  1.2403e+02,  3.1155e-01,  1.0864e-01, -8.6581e-02,
        -6.3581e-02,  1.2294e-01,  8.1758e-02, -1.2490e-02,  8.0842e-01,
         9.1591e-01, -2.3636e-01,  8.3411e-02,  1.1023e-02,  1.0730e-01,
         1.2801e-01, -9.6624e-02, -2.6358e-01,  6.0739e-02,  4.9164e-01,
        -1.5067e-01,  3.4931e-01, -1.7311e-01,  1.3862e-01,  1.0382e-01,
         4.0188e-03,  5.3849e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1887,  7.0130, -0.0524,  0.0355,  0.0490,  0.0646,  0.0541,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4542e-01,  8.2614e+01, -1.7857e-01,  2.1704e-01,  1.5548e-01,
         1.1264e-01, -7.0218e-02,  6.0207e-02,  1.2151e-02,  3.3298e-01,
         1.9464e-01, -4.0189e-01, -7.2781e-02,  1.1835e-01, -1.3246e-01,
        -5.0220e-03,  4.7450e-01,  2.4507e-01,  5.5309e-02, -9.3719e-03,
         1.3574e-01,  8.3460e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0455e-04,  1.8414e-01, -2.8985e-03, -6.7535e-04,  3.9448e-04,
         1.7551e-04,  2.3592e-04, -9.3039e-05, -6.8360e-06, -4.4406e-04,
        -8.6481e-04,  5.1521e-04, -7.9131e-04,  1.0412e-03,  1.1656e-04,
         1.2556e-04,  7.4212e-04,  1.3748e-04, -1.2013e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0292e-02,  7.4748e-01, -5.2306e-03,  8.6467e-04,  1.7803e-03,
         3.7165e-03,  2.8576e-03, -2.1137e-03, -1.9905e-04,  1.0158e-03,
        -8.9150e-04,  8.9112e-04,  3.2006e-04,  1.1458e-03,  3.2121e-04,
        -1.0174e-04, -7.8916e-04,  4.5875e-04,  1.3203e-05,  2.6276e-04,
        -1.5921e-04,  4.7931e-04,  2.3075e-04,  1.7673e-03, -1.0109e-03,
         6.3076e-04, -7.9884e-04, -1.6750e-04,  1.2120e-03, -6.4981e-03,
         2.8471e-03,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-3.2252e-03,  5.2023e-01, -1.4838e-04, -1.3714e-04, -6.1494e-04,
        -1.0110e-03,  6.9186e-04,  1.2818e-04,  7.1442e-04, -4.7876e-04,
        -7.7145e-04, -5.8877e-04, -9.1711e-04, -7.0944e-04, -2.1972e-03,
        -1.1508e-03, -1.5994e-03,  1.3547e-04, -7.5120e-04, -3.7733e-04,
        -6.0417e-04,  2.0089e-05,  8.7451e-04, -1.7989e-03,  2.4993e-04,
        -6.3096e-04, -1.8301e-03, -4.3010e-04,  2.2094e-05, -1.0654e-03,
        -1.9696e-04, -4.7892e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3510e-04,  1.5361e+00, -7.9102e-03, -2.7601e-03,  4.5586e-03,
         1.1993e-03,  2.9320e-03,  5.1092e-03,  2.8593e-03,  1.0098e-03,
        -9.3050e-04,  6.9631e-03, -3.1745e-03,  3.4722e-03,  1.2260e-03,
         6.0477e-04,  1.5890e-03,  3.7292e-05,  2.7389e-03,  2.6947e-03,
        -5.6442e-03, -1.2252e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9277e-02,  7.6078e+01,  1.7842e-01,  1.4675e-03, -2.5302e-02,
         2.3852e-01,  1.6446e-01,  4.1108e-02,  1.3428e-01, -1.4006e-02,
        -2.4995e-02, -1.6850e-01,  1.9981e-01,  1.1696e-02,  2.0120e-02,
        -4.6230e-01, -2.1013e-01,  3.9155e-01, -2.8194e-01,  1.6958e-01,
         4.4222e-02, -1.9919e-01, -5.7216e-02, -2.3948e-01, -7.9442e-02,
         8.2893e-02,  1.1756e-01, -2.8639e-01,  6.5526e-02,  3.3584e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7302e-04,  1.8456e-01, -7.8059e-04,  8.0712e-05, -1.9310e-04,
         2.7541e-04, -8.8563e-04, -3.1420e-04,  1.9662e-03, -8.0949e-04,
         5.0160e-04, -1.3717e-04, -1.7921e-04, -1.2042e-04, -2.8883e-04,
        -3.8821e-04, -4.0224e-04, -1.6926e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4823e-03,  6.9871e-01,  1.6009e-02, -1.1572e-03, -3.2165e-04,
        -2.1104e-03, -5.7175e-04,  9.6029e-04, -1.7587e-03,  1.7669e-04,
        -3.3369e-04, -9.7489e-05, -5.1229e-04,  3.7113e-04,  9.7771e-04,
         1.5334e-03,  1.7562e-03,  2.9007e-04, -6.5012e-06,  1.2616e-05,
        -1.7911e-04, -3.5929e-03,  3.0490e-03, -1.3218e-03,  2.3140e-03,
        -8.8807e-04,  3.2617e-03,  2.9001e-04,  2.4922e-04,  4.5546e-04,
        -4.1679e-05,  3.8638e-04,  9.9224e-04, -1.6151e-03,  4.2994e-04,
         1.4060e-03,  5.3146e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5599e-01,  2.0347e+01, -1.2710e-01,  5.5870e-02,  1.2820e-02,
         4.6935e-02,  2.0363e-02, -2.7951e-04, -2.7089e-02, -4.9992e-02,
         3.0295e-03, -2.8622e-02,  8.3812e-03,  4.8111e-02,  3.0231e-02,
         1.4880e-02, -6.9961e-02, -2.7439e-02, -1.3032e-02,  5.5158e-03,
        -2.3651e-02, -3.9186e-02,  7.1655e-02, -1.1915e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1817e-02,  9.6125e+00, -2.9053e-02, -2.8129e-02, -8.4883e-03,
         2.7762e-03,  1.3880e-03, -4.1707e-03,  8.0856e-03,  5.0187e-03,
         9.7158e-03,  8.1544e-04,  6.8130e-03, -9.1569e-03,  2.2126e-03,
        -1.1292e-02,  1.0710e-02, -5.8575e-03, -9.6527e-03, -1.0708e-02,
         8.6274e-03, -1.4663e-02, -6.3226e-03, -1.5302e-02, -1.3734e-02,
        -4.8465e-03,  6.9600e-03, -3.5093e-03, -2.0263e-03, -9.3199e-03,
         3.5607e-02, -1.4867e-02, -4.6763e-04,  7.6436e-03, -5.7497e-03,
        -1.9235e-02, -1.3702e-02,  5.0048e-03, -2.6988e-03, -2.9182e-02,
         5.6387e-03, -6.4176e-03,  4.6514e-03,  1.4368e-02, -9.9029e-03,
         3.5879e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4878e-03,  3.5696e-01, -1.0767e-03, -7.2765e-04, -8.8394e-05,
         7.9363e-04, -7.7725e-04,  6.1761e-06,  2.1174e-04, -2.8509e-04,
         4.5327e-04, -1.0788e-04, -3.7626e-04,  5.8746e-04,  7.0075e-04,
        -8.2346e-04,  6.0917e-04, -6.5939e-04,  4.6208e-04, -1.5979e-03,
         1.1696e-04,  2.4498e-04,  1.0278e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2658e-01,  1.5694e+01,  2.3073e-01, -3.7087e-02, -1.2956e-02,
         3.1658e-02,  8.1068e-02,  2.1593e-02,  5.3072e-02,  7.6835e-02,
         2.3653e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0117,  0.9047, -0.0105,  0.0050,  0.0038, -0.0033, -0.0024, -0.0012,
         0.0042,  0.0019,  0.0029,  0.0016,  0.0023, -0.0016,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7737e+00,  2.4181e+02, -6.3897e-01, -5.8418e-01, -4.6800e-01,
         1.4508e+00,  3.8661e-01,  9.7589e-01,  7.5946e-01, -3.0601e-02,
         8.3048e-01, -5.8626e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2149e-02,  2.1372e+00, -3.1509e-03,  1.0258e-02,  3.9496e-03,
         5.7831e-03, -4.3492e-04,  3.2222e-03,  1.5211e-03,  3.8421e-03,
        -9.9837e-05,  3.3182e-03,  2.4226e-03,  4.0741e-03, -1.1232e-03,
         1.2762e-03,  2.6129e-03, -1.6245e-03,  5.4067e-03, -3.4915e-03,
        -6.0703e-04,  3.5809e-03,  8.2952e-03, -5.5746e-04,  9.0276e-04,
         9.5426e-04,  1.1211e-02, -1.4249e-03, -4.1321e-04,  4.3135e-03,
        -3.5311e-04,  8.9816e-03, -4.6027e-03,  1.8803e-03,  3.8553e-03,
        -2.7011e-03, -2.2326e-03,  2.2905e-03, -6.4126e-04, -3.0013e-04,
        -1.1647e-03,  1.0010e-03, -4.1587e-04, -1.2757e-03,  1.6155e-03,
        -2.0775e-03,  1.7246e-03, -3.6430e-04,  3.0276e-04,  3.0157e-04,
         1.3973e-03, -5.7310e-04, -2.8335e-03, -4.8445e-03, -4.6808e-03],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-2.6509e+00,  1.3653e+02, -3.7159e-01, -2.8483e-01,  2.0866e-01,
         2.1700e-01, -2.3100e-01,  2.3719e-01,  4.0456e-02, -1.3201e+00,
        -7.6606e-01,  5.3808e-02, -4.2791e-02, -5.8022e-02,  3.9869e-01,
         3.2064e-02, -1.3680e-01,  1.8503e-01,  2.6127e-01,  7.1548e-02,
        -8.0735e-02,  3.2827e-01,  7.6819e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.6476e-03,  1.7540e+00,  1.5286e-03,  5.6235e-05,  1.2633e-03,
         2.4642e-03, -8.6234e-04, -8.2849e-04, -1.4440e-03, -7.2303e-04,
        -2.4052e-03,  2.7739e-04, -2.2109e-04,  2.6223e-03,  2.3349e-03,
        -1.7617e-03, -3.6401e-03, -1.3265e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5386e-02,  5.5967e+00,  5.6132e-02,  6.9577e-02,  5.1841e-02,
         2.8157e-03,  7.9628e-03,  2.7420e-02,  5.9037e-03, -1.7003e-02,
        -3.8022e-02,  1.2595e-02, -2.4370e-03, -1.2548e-02,  1.8821e-02,
         2.0313e-02,  3.8837e-02, -8.7365e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3256e-03,  3.8944e+00, -8.3743e-03,  1.1739e-02, -3.2973e-02,
        -1.3647e-02,  2.1606e-02, -2.1530e-02,  6.2670e-03,  1.1528e-02,
        -3.8490e-03,  1.8166e-02,  2.1833e-02, -4.8349e-04,  2.1846e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.2910e-04,  5.9981e-02, -7.3983e-05, -8.2853e-05,  1.6949e-04,
        -2.3837e-04,  3.4338e-05, -8.4047e-05, -3.6413e-05,  3.5698e-05,
         7.3622e-05, -4.2801e-05,  1.1670e-04, -4.3366e-06, -3.2941e-04,
        -3.1283e-05, -8.1605e-06, -9.9501e-05,  1.8612e-05,  1.1842e-04,
         1.4863e-04, -1.1348e-04, -4.1167e-05, -8.9193e-05, -2.8067e-04,
        -5.6613e-05,  3.9694e-05, -3.3651e-05, -2.5952e-04,  1.5627e-04,
        -9.2723e-05, -1.1137e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.7921e-04,  1.4225e-01, -1.3314e-03,  3.7449e-04,  2.9850e-04,
        -1.5123e-05, -7.3878e-05,  1.2709e-04, -1.5060e-04, -3.7866e-04,
         2.6217e-04, -1.6151e-04, -3.5489e-05,  4.2475e-05, -2.4971e-04,
         9.6754e-05, -1.1165e-04,  2.6546e-04,  4.7577e-04, -2.3658e-04,
         2.2442e-05,  4.9293e-04, -6.2460e-04, -1.5499e-04, -1.0249e-04,
        -7.1684e-05,  5.7892e-05,  1.5232e-04,  3.1685e-04,  1.2810e-04,
         1.0043e-04,  8.3901e-04,  7.1788e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1572e-01,  1.3240e+01, -8.8569e-03,  1.1790e-01,  1.4250e-02,
        -3.9187e-02,  1.3948e-02,  8.7927e-03,  3.3114e-02,  1.5959e-02,
         8.1429e-03,  3.0484e-03,  1.3902e-02,  1.1498e-02,  9.6055e-03,
        -1.4462e-02,  7.1189e-03, -1.5068e-02,  4.3277e-03, -6.6847e-03,
         6.3470e-03, -1.1699e-02,  4.3309e-03,  1.9201e-02,  5.2564e-03,
        -7.4118e-04,  1.1330e-02, -5.2243e-03,  2.2663e-03, -1.1488e-02,
         1.3053e-02,  3.4059e-03,  2.9928e-03, -4.6625e-03,  6.2170e-03,
        -8.6186e-03, -7.6742e-03,  7.7549e-03,  9.9172e-03,  3.9805e-03,
         6.8048e-03, -8.4728e-03,  3.1182e-03,  2.0400e-02, -3.6281e-03,
         1.6875e-02,  1.6960e-03,  1.3452e-05, -3.0917e-03,  8.6713e-04,
        -3.5307e-03,  8.0653e-03,  1.3642e-02,  1.6303e-03,  1.6551e-02,
         3.3174e-02,  1.2273e-02,  6.3194e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3030e+00,  1.6769e+02, -5.9446e-01, -3.3520e-01, -4.2732e-02,
        -2.0786e-01, -1.9887e-01, -1.5329e-01, -1.4070e-01, -2.3257e-01,
         3.7452e-02,  1.1351e+00,  8.4934e-02,  4.1915e-01, -6.4013e-01,
        -1.4511e-01,  2.3031e-01, -1.8251e-02, -8.8953e-02, -2.6317e-01,
        -1.4922e-01, -1.9926e-01, -4.0779e-01, -4.6993e-01, -1.3104e+00,
        -1.1981e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9167e-03,  3.8138e-01, -2.8314e-03, -8.9071e-04, -3.8491e-04,
         1.1309e-03,  3.0726e-04,  1.2154e-03,  1.1150e-03, -4.0434e-05,
         1.2358e-03,  2.9860e-04,  1.6132e-04,  5.4279e-04, -6.4572e-05,
         6.8820e-04,  8.3219e-05,  5.7449e-05, -2.0046e-04, -2.6069e-04,
         2.6024e-04, -1.1669e-03,  1.3903e-03, -1.9564e-03, -2.5846e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0644e-03,  7.1314e-01, -1.0193e-02, -2.6168e-03,  2.3805e-03,
         1.8348e-04, -8.4196e-04, -6.9433e-04, -2.9522e-04, -6.1996e-04,
         8.0059e-04,  3.4328e-04, -9.3486e-04,  1.8587e-03,  8.7603e-04,
        -1.3751e-03,  1.5622e-04,  1.9588e-04, -4.9581e-05,  1.3487e-03,
         1.0063e-04,  9.9190e-05,  1.1165e-04, -3.5961e-04,  5.3829e-04,
        -1.6173e-03,  1.3576e-03, -1.2462e-04, -2.5946e-04, -3.6977e-04,
         1.0623e-03, -4.3081e-04,  1.8241e-03, -3.3746e-04,  1.8066e-03,
        -2.0904e-03,  1.8448e-04, -8.5643e-04,  6.1861e-04,  1.7540e-03,
        -6.9427e-04,  4.4525e-04, -3.9998e-04,  1.1504e-03,  5.4189e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2254e-04,  2.1703e-02,  1.9858e-04, -9.3209e-05,  1.0875e-05,
        -1.4683e-04, -7.7561e-05,  2.2193e-06,  2.6057e-06, -1.3740e-04,
        -1.4469e-04, -1.5625e-05,  2.1588e-05,  8.3708e-06, -3.9210e-05,
         5.2303e-05,  6.2739e-05, -2.6873e-06, -7.1500e-06, -2.2742e-05,
        -5.1107e-05, -1.8222e-05,  6.4491e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2140e-02,  1.4252e+00,  2.0126e-02,  3.7550e-03,  5.2224e-04,
        -9.3407e-03, -1.6665e-02, -2.8999e-03, -5.1038e-03, -1.5942e-03,
         1.3872e-02,  1.2789e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([-6.4389e-03,  6.5443e-01, -4.9712e-03, -1.0656e-03, -2.6626e-04,
        -1.2686e-03, -3.7316e-03, -2.0876e-04,  1.0170e-04, -1.5460e-03,
         1.0133e-03, -1.4432e-03, -3.3848e-03,  1.7565e-04, -1.0004e-03,
        -1.8246e-03, -1.7199e-03,  1.5056e-03, -5.4568e-05,  4.8974e-04,
        -6.5665e-04, -3.0271e-04, -1.2559e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1193e-03,  1.7064e-01, -2.1340e-05, -1.6277e-04, -1.0554e-04,
        -8.0178e-04, -2.1728e-04,  1.2390e-04, -3.2392e-04,  3.6846e-04,
        -4.4733e-05, -5.8959e-05, -2.2999e-05,  1.6898e-04,  6.3993e-05,
         1.7215e-05,  3.7471e-04, -2.3349e-04, -2.3869e-04, -6.1575e-05,
        -1.0250e-04, -4.1836e-04, -1.9898e-05, -2.7737e-04, -1.5820e-05,
        -5.4854e-05, -2.8380e-04,  1.0728e-04, -1.4503e-04, -9.7174e-05,
         1.6196e-05,  4.3740e-04, -9.6209e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.7343e-04,  3.0575e-01,  4.3754e-04, -2.4534e-03, -8.3840e-04,
        -6.0363e-04, -1.1842e-05, -1.5310e-04,  8.6408e-05, -7.3306e-04,
        -5.3130e-04, -4.3229e-04,  2.2743e-04, -8.7768e-04,  7.9151e-04,
         4.0347e-04,  1.0106e-03, -6.5960e-04, -7.7769e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9973e-03,  1.4393e-01,  6.2253e-04, -7.2568e-04, -6.7795e-04,
         1.0813e-04,  3.2837e-05, -3.9979e-04, -2.1907e-04,  2.1117e-04,
        -6.3745e-04, -6.3115e-05,  2.5177e-05,  1.6895e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7863e-03,  1.1589e+00, -1.4238e-02, -5.8291e-05,  1.4716e-03,
        -3.2533e-03, -1.9968e-04,  7.5704e-04, -1.1328e-03,  3.1302e-04,
        -6.0884e-04, -3.2041e-04,  1.3606e-03, -2.6320e-03,  1.0731e-03,
         6.4221e-04,  1.9226e-04,  1.0445e-03, -1.1065e-03, -1.4644e-03,
         1.6029e-03,  1.1807e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2132e-02,  7.7163e+00,  7.1716e-02, -7.0988e-03, -1.7159e-03,
        -9.3387e-04,  3.4210e-02, -2.4462e-04, -1.5401e-02,  6.0805e-03,
         6.5747e-03, -1.2987e-03, -2.7457e-02, -1.4129e-03, -3.9186e-03,
        -4.4454e-02, -1.0662e-02,  1.1905e-02, -3.1612e-03,  5.4123e-02,
         1.1148e-03, -5.5700e-03,  1.0405e-02, -7.4882e-04, -3.6648e-03,
        -1.2196e-02,  3.0083e-03, -3.8584e-02, -2.2301e-02,  5.8726e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1983e-03,  1.8953e-01, -5.7741e-04, -5.4007e-04, -6.9680e-05,
         6.3695e-04, -8.0532e-04, -5.2212e-04,  3.0327e-04, -1.6373e-04,
         1.0207e-04,  7.0926e-04, -9.8367e-05, -3.6275e-04, -3.4332e-04,
        -2.1730e-04, -1.2246e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4510e-03,  2.7150e-01,  2.9327e-03,  5.9357e-04, -1.4358e-03,
        -1.6086e-04, -3.9983e-04,  1.2402e-03, -6.1446e-04, -4.2534e-04,
         3.0089e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7537e+00,  7.8895e+01,  2.7270e-03, -2.8390e-01, -2.9788e-02,
        -2.1308e-01,  3.4894e-01, -6.4285e-02,  8.6738e-02,  1.0326e-01,
        -3.7131e-01, -4.1387e-02, -3.1632e-01, -4.2722e-01, -1.9324e-01,
        -4.4029e-01, -2.8962e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0941e-03,  1.7276e-01, -3.6644e-04,  1.2619e-03,  3.6220e-04,
        -6.6689e-04,  2.5370e-04, -3.2326e-05, -2.5926e-04,  3.2319e-05,
         2.1965e-04,  4.2815e-04,  3.7698e-04, -7.9973e-04,  3.0290e-04,
         2.6261e-04,  9.8817e-05,  2.1202e-04,  3.8418e-04,  1.2609e-03,
         3.4588e-04, -2.1413e-04,  4.8639e-04,  2.7726e-04, -3.3042e-04,
         5.3959e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3348e-02,  9.2835e-01,  1.4810e-02, -3.0785e-03,  3.2775e-03,
        -1.9147e-03,  4.6352e-03,  3.5164e-03, -2.7235e-03,  5.7749e-03,
        -2.6080e-03,  3.4891e-03,  1.7613e-04, -2.5726e-03,  2.1680e-04,
         1.9640e-03, -9.4311e-04, -5.3054e-03,  1.8985e-03,  1.8010e-04,
        -1.5052e-03, -2.8703e-03, -2.9924e-04, -6.1856e-03,  2.1357e-03,
        -2.6422e-03,  5.5892e-04, -7.6112e-04,  1.4641e-02, -6.3750e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1532e-01,  7.6469e+01,  7.7405e-01,  7.9114e-02, -6.8284e-02,
        -1.4113e-01,  4.5112e-01,  4.0762e-01, -2.1758e-01,  6.3852e-02,
         2.0162e-01,  5.7827e-01,  7.6604e-03,  3.2719e-01, -2.1228e-01,
         7.2321e-02, -4.2831e-02,  8.5911e-02, -2.6000e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 1.0784e-03,  9.2228e-01, -6.3870e-04,  4.1015e-03,  5.1575e-03,
        -2.2620e-04,  5.5018e-03,  3.1704e-03, -3.7033e-03,  1.3011e-03,
         2.7682e-03,  3.7415e-04,  8.6135e-04,  3.3628e-03,  9.8583e-04,
        -8.2985e-04,  3.3433e-03, -4.3414e-03,  1.3557e-03,  8.3974e-04,
        -7.8055e-04,  8.8939e-04, -1.0726e-03,  2.4711e-04, -4.7851e-04,
         3.0465e-03,  6.9866e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8579, 13.4624, -0.3599, -0.0489,  0.0656,  0.0216, -0.1683,  0.0339,
        -0.2664, -0.0498,  0.0369, -0.1862, -0.1081, -1.2920,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.0299e-03,  4.9324e+00, -4.8936e-03, -5.4791e-03, -1.0642e-02,
         2.2690e-02, -9.8723e-03,  2.8265e-03, -1.7657e-02, -5.5165e-03,
         6.4789e-03, -1.5102e-03, -1.8757e-02,  6.2741e-04, -9.5291e-03,
        -1.6626e-02, -1.2774e-02, -2.7377e-02, -5.0568e-03, -5.4666e-03,
        -5.2514e-03, -7.0188e-03, -1.7639e-03, -8.7435e-03, -7.2165e-03,
        -3.6080e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6342e-02,  3.2121e-01,  3.6131e-03,  1.9717e-03,  5.8625e-05,
        -2.0314e-03,  1.4050e-04, -3.7590e-04, -9.6269e-04, -5.4416e-04,
        -4.0461e-04,  2.7631e-03, -3.5045e-03,  7.7401e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2497e-02,  1.8189e+00, -7.2815e-03, -1.4834e-03,  1.5585e-03,
        -6.0634e-04,  2.4752e-03,  2.1630e-03,  5.1819e-03,  1.9713e-05,
        -1.0675e-03,  8.3269e-04,  1.1155e-03, -4.6034e-04,  6.3713e-03,
        -1.2676e-03,  1.5775e-03, -1.6231e-03, -1.8440e-03,  1.3000e-02,
         1.2946e-03,  7.6158e-03, -4.1522e-03, -5.9413e-03,  2.3014e-03,
        -6.1760e-04, -1.4087e-02,  2.8498e-03,  1.7108e-04, -7.8326e-04,
         1.1834e-02,  5.4949e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0399e-01,  8.9895e+00,  7.5963e-03, -3.4142e-02,  1.6800e-02,
        -1.1182e-02, -3.7751e-02, -7.0459e-03,  2.3740e-02, -1.4038e-02,
        -2.4733e-02, -9.3782e-03, -1.4815e-02, -1.4922e-02, -4.4607e-03,
        -3.8733e-02, -9.2206e-03,  8.8153e-03, -1.7843e-03, -2.4702e-03,
         6.7604e-03, -8.9584e-04, -8.7638e-04,  4.1097e-02, -2.4006e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3093e-02,  6.1964e+00, -1.3365e-01, -4.4181e-02,  1.3864e-03,
        -9.3715e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2286e-03,  1.1661e-01,  3.6126e-04, -6.0664e-04, -1.2240e-04,
        -3.0632e-04,  2.2022e-04, -4.1972e-05, -8.1259e-05,  1.3372e-04,
        -5.2577e-05, -9.3967e-05, -3.5931e-05, -1.5361e-04, -2.2792e-04,
        -9.9609e-05, -2.5546e-04, -2.2999e-04, -3.6359e-05,  4.3435e-05,
        -1.5448e-04,  2.9377e-05, -3.1939e-04, -9.4178e-05, -1.0097e-04,
         3.3796e-04,  6.2792e-05,  2.8473e-04,  1.8132e-04, -2.8943e-04,
         2.0757e-04, -2.1254e-04, -5.4076e-05,  1.0530e-04,  1.6002e-04,
        -1.3721e-04, -2.4933e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2327e+00, -1.6776e+02, -1.7721e+00, -1.6243e+00, -2.8628e-02,
         6.9139e-02, -2.0542e-02,  7.9832e-01,  4.9386e-01, -4.5427e-02,
         8.8715e-02, -3.2769e-03,  3.1081e-01,  4.6481e-01,  9.9979e-01,
         3.4056e-01,  4.9519e-01, -5.0162e-01,  3.0115e-01, -7.6647e-02,
         5.5159e-01, -2.2597e-01,  5.4834e-01,  4.4575e-01, -1.0523e-01,
         2.1759e-01,  3.6782e-01, -9.2555e-02,  2.7547e-01, -5.0729e-01,
         1.4491e-01, -3.9728e-02,  1.1650e-01,  3.2548e-01, -4.0209e-01,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1963e+00, -1.1545e+02, -2.2099e+00, -1.2884e+00, -4.4608e-01,
         7.9261e-01,  1.3997e+00, -2.1017e-01, -2.7956e-01, -1.0395e-01,
        -2.1666e-01,  3.3010e-01, -8.5162e-01, -4.3135e-01,  1.1359e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0218e-02,  1.2382e+00, -1.5519e-02, -2.7893e-03,  1.4146e-03,
        -1.8591e-03, -6.0624e-04, -4.3044e-04, -9.5760e-04, -1.8385e-03,
        -9.0331e-04, -1.8590e-04, -4.2782e-04, -1.0689e-03,  6.2918e-04,
         2.1164e-03,  4.8062e-04,  9.4459e-04,  5.2595e-04,  1.8482e-03,
         9.3979e-04, -1.4146e-03, -3.5976e-03,  2.2883e-03,  4.0408e-03,
        -2.0510e-04,  8.7546e-04,  4.4905e-04,  7.1006e-05, -1.8921e-03,
        -5.6423e-05,  1.2594e-04,  4.7142e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2872e-02,  6.8351e+01, -1.4059e-01, -1.1428e-01, -2.7239e-02,
        -2.2361e-01, -3.5301e-03,  2.6617e-01,  3.9594e-01,  1.1489e-01,
         3.2452e-01,  2.7772e-02,  1.0502e-01,  3.2751e-02, -3.3273e-03,
         1.1457e-02,  1.9212e-02, -5.3292e-02, -3.6060e-02,  9.8672e-02,
        -1.1196e-02, -2.9122e-03,  1.5532e-02,  3.4324e-02,  4.0098e-02,
         8.7890e-03,  1.5549e-01,  9.6132e-02,  1.8192e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-4.0181e-02,  1.5415e+01, -1.8399e-02, -4.9550e-02, -9.5343e-03,
        -1.7704e-02, -7.0564e-04, -3.4764e-02,  1.3084e-02, -5.4045e-02,
        -1.5912e-03,  3.2136e-02,  2.4063e-02, -4.2994e-02,  9.6475e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.9904e-04,  8.1806e-01,  1.4369e-03,  1.1387e-03, -4.0143e-04,
         7.2698e-04,  5.5673e-04, -2.0823e-03,  7.0039e-04,  5.4831e-04,
         1.6339e-05,  4.1618e-04, -8.7608e-04, -3.6869e-04, -8.5548e-04,
        -2.7328e-03,  5.8602e-04, -3.0309e-04, -2.2618e-04, -5.4428e-04,
        -5.7387e-04,  1.4076e-04, -1.1853e-03, -2.2122e-04, -5.9961e-04,
        -4.9947e-04, -2.9513e-04,  1.0253e-04, -3.5068e-04, -2.1498e-04,
        -1.0150e-03, -2.9021e-04, -3.7626e-04, -6.0563e-04, -5.5599e-04,
         6.8099e-05,  5.2481e-04, -1.9190e-04,  4.1266e-04, -8.0139e-04,
        -1.3384e-04,  4.4368e-04,  8.3246e-04, -1.7107e-04, -5.0405e-04,
        -9.8931e-04, -5.6547e-04,  1.0913e-04, -1.6229e-04], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3137e-01, -1.0289e+02, -4.2875e-01, -5.6732e-01, -7.7604e-01,
         1.2059e-01, -3.0353e-01, -3.3947e-02, -2.2219e-01, -2.8079e-01,
         2.5855e-01, -2.7584e-01, -3.0266e-02, -3.4411e-02,  4.4479e-02,
        -2.6940e-02, -3.1246e-01, -3.7277e-02, -2.7702e-02, -1.7294e-01,
         1.4729e-01, -4.4310e-02, -1.1617e-01,  1.1279e-01,  1.5646e-02,
         2.7261e-01,  3.2966e-01, -2.4089e-01, -1.3539e-01,  2.6076e-01,
         6.4342e-02,  3.6419e-02, -1.1714e-01,  3.6879e-01, -3.5043e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3187e-01,  8.9918e+01, -1.8752e-01, -4.8863e-02, -9.1683e-02,
         3.4046e-01, -3.9735e-01,  1.3883e-01,  2.2376e-02,  5.9684e-02,
        -1.0281e-01,  1.0204e-01, -1.2309e-01,  3.4118e-02, -1.6059e-01,
        -1.1407e-01,  3.7929e-03, -7.7869e-02,  5.0138e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3016e-04,  2.4431e-01, -3.0932e-03, -1.8589e-03, -9.4431e-04,
         1.8769e-03, -1.0616e-03,  3.7693e-04,  2.7420e-04,  1.6525e-04,
         2.0178e-03, -4.1865e-04,  7.3884e-04, -2.1430e-03,  9.2202e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7654e-04,  3.2891e-01,  1.1458e-03, -5.6789e-04, -9.6468e-04,
        -7.7950e-04,  4.5189e-04,  5.8941e-04, -7.6848e-04, -5.2803e-05,
        -7.0383e-04,  7.6302e-04,  7.3845e-04, -1.6337e-04,  2.6823e-04,
        -7.8365e-04, -4.2764e-04, -4.3312e-04, -4.6882e-05,  2.2083e-04,
         6.7231e-04,  6.2317e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8991e-04,  4.0491e-01, -5.4652e-03,  1.7933e-04, -3.7598e-04,
        -1.1694e-03, -1.0662e-03, -2.4267e-04, -1.1212e-03, -8.8071e-04,
         4.5457e-05,  9.9786e-04,  2.2730e-04, -7.9358e-05,  1.5670e-04,
         6.3625e-04, -5.1078e-04, -3.6113e-04,  5.2304e-04,  5.5621e-04,
         9.8611e-04, -1.0784e-04,  4.1348e-05,  2.3023e-05, -5.4837e-04,
         2.4749e-04,  4.6833e-04,  2.1199e-04,  1.0065e-03,  1.6997e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.5591e-02,  2.7862e+00, -1.0544e-02, -9.1138e-03, -2.9884e-04,
        -6.8526e-03, -1.5545e-02, -1.9316e-02, -4.0219e-03, -1.9122e-02,
        -6.3622e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.6301e-04,  1.6151e-01, -4.5604e-04, -3.6033e-04, -1.1036e-04,
         2.0713e-04,  9.0144e-04,  3.9299e-04,  2.4018e-04,  3.6097e-04,
         1.6047e-06,  1.3154e-03, -1.2045e-04,  2.2703e-04, -5.3788e-04,
         5.7497e-04, -2.6571e-04,  9.8785e-05,  2.2940e-04, -1.2888e-04,
        -7.8158e-04, -3.4748e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9190e-03,  8.2169e-01,  3.2295e-03, -2.0875e-03,  1.4850e-03,
        -1.2265e-02, -1.7627e-03, -1.3173e-03, -1.3545e-03, -1.8388e-03,
        -9.2050e-04, -9.5007e-04, -2.2927e-04, -8.8391e-04,  2.5735e-04,
        -7.1058e-04, -1.4540e-03, -9.8652e-04, -4.4471e-03,  3.3526e-04,
        -7.7260e-04, -1.2249e-03, -6.8324e-03,  7.4409e-04,  1.7968e-03,
         5.2862e-04, -7.3701e-04,  1.3620e-04, -2.2762e-03,  7.3061e-04,
         4.4178e-03,  1.4076e-03,  7.1349e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8196e-03,  8.6572e-01, -2.3654e-04, -2.6753e-03,  1.3117e-04,
        -3.5448e-04,  3.5661e-03, -5.5955e-03,  3.7227e-03, -8.8579e-04,
        -1.5192e-03,  4.8965e-04,  2.2984e-03,  1.1352e-03, -1.0795e-03,
         1.5631e-03,  9.4566e-04,  8.8812e-04,  1.3955e-03,  2.8281e-04,
         2.4304e-03, -3.4422e-03,  3.9216e-04,  9.6379e-04,  7.1877e-04,
         1.9298e-03,  1.9221e-03,  5.1044e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4358e+00,  2.0629e+02, -1.3249e-01,  2.2478e-01, -8.4819e-01,
         6.2349e-01, -2.3946e-01,  6.5264e-01, -1.3047e+00,  5.0748e-01,
        -7.9530e-01, -2.3340e-01,  1.3245e-01,  1.6018e-01, -5.8530e-02,
         1.7173e-01,  4.9397e-01, -1.6966e-02,  1.6920e-01, -1.4743e-02,
         2.6001e-01,  5.4116e-02,  7.4862e-01,  8.3667e-02,  4.2011e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 1.4973e-02,  3.9193e+00,  4.2305e-03,  4.3047e-02, -1.3047e-02,
         9.5367e-03, -9.4816e-03, -7.0764e-03, -2.3855e-03, -6.3805e-03,
         6.7936e-03,  7.9121e-03,  2.3061e-05,  2.2151e-02, -6.2026e-03,
        -1.6758e-02, -4.1922e-05,  3.2917e-03, -4.0199e-03, -7.6964e-03,
         7.8718e-03, -5.9913e-03, -7.8285e-03,  6.1016e-04, -5.8402e-03,
        -2.3801e-03,  2.7761e-03, -5.2699e-03, -4.1122e-03, -8.5490e-03,
        -3.2286e-03,  6.5891e-04, -2.1684e-02,  1.8086e-03,  1.7154e-03,
         1.6092e-03, -2.1186e-03,  4.6706e-03,  2.8063e-03, -4.3673e-03,
         1.4532e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7904e-03,  1.4692e+00,  4.0342e-03, -3.4002e-03,  1.4215e-03,
         2.3314e-04,  3.0707e-03,  3.6822e-03, -7.6289e-04,  4.3792e-04,
        -1.5596e-03, -4.7379e-03,  1.9892e-03, -3.7843e-04,  9.4690e-03,
        -4.5942e-03,  2.7519e-03,  4.3759e-03,  1.6400e-03, -1.3005e-02,
        -1.8619e-03, -3.2100e-04,  1.2572e-03,  1.8983e-03,  1.4781e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1112e-01,  3.0542e+01,  2.1446e-02,  5.8047e-02,  6.3080e-02,
        -6.0913e-03, -6.7837e-03,  1.6529e-02,  2.6435e-02,  6.4895e-03,
        -1.1797e-02,  7.5451e-02, -4.5006e-02,  1.6902e-02, -6.3259e-02,
        -2.0678e-02, -3.9086e-02,  1.1408e-02, -3.8340e-02,  5.2479e-02,
        -2.5732e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.7539e-03,  4.7526e-01, -3.5625e-04, -8.3132e-04, -2.5905e-04,
        -9.1946e-04, -1.9010e-03,  1.2977e-04, -8.5971e-04, -1.4988e-03,
        -2.8224e-03, -1.0763e-03, -2.1876e-05, -4.8711e-04, -2.3347e-03,
        -9.2686e-04, -1.6764e-03, -5.0802e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1755e-03,  4.5875e-01,  2.0912e-03, -1.9509e-04, -1.2658e-03,
        -9.7033e-04,  3.4863e-04, -1.3255e-03, -1.2089e-03, -2.8587e-04,
        -1.8199e-04, -7.4441e-05,  1.0591e-03,  1.4856e-03,  2.2882e-04,
         1.6638e-04,  4.9284e-03, -3.1127e-04,  1.2490e-03,  2.3712e-04,
        -2.5691e-04,  1.8993e-04, -4.3594e-04,  7.5305e-05, -1.7548e-04,
        -5.6624e-05, -7.4750e-04,  8.4328e-04, -7.3887e-04,  3.8189e-04,
        -1.3426e-04,  1.6339e-04, -4.4450e-04,  1.1072e-03,  4.3200e-04,
        -5.4707e-05, -1.8682e-03, -1.8649e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9093e-03,  1.4842e+00,  7.4530e-03, -6.5596e-03, -1.5508e-03,
         2.4549e-03, -5.6440e-03, -1.1788e-03, -3.1184e-03, -3.1681e-03,
        -4.2939e-04, -3.2384e-03, -5.4600e-04,  1.5014e-03, -1.1530e-03,
         1.4421e-03,  5.6748e-04,  3.0910e-03,  1.7380e-04, -1.7264e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0211e-03,  2.1339e-01,  5.3857e-05, -3.2486e-04, -4.3649e-04,
         9.8006e-04, -7.8760e-05,  8.2681e-04,  5.6959e-04, -2.1678e-04,
         8.4580e-04, -5.5662e-05, -9.1331e-04, -1.0344e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8653e+00,  7.8185e+01, -1.6498e-01, -2.1551e-01, -5.9722e-02,
         1.1632e-01, -1.1516e-02, -5.4910e-02, -6.7029e-02, -4.7698e-02,
         5.4667e-01,  8.5705e-02, -1.0737e-01,  7.0457e-02, -1.1943e-01,
         1.1897e-01,  1.2852e-01, -1.0126e-02,  9.6580e-02,  1.2200e-02,
        -1.5433e-01,  2.2574e-01,  3.8438e-02, -2.8893e-02, -1.3299e-01,
        -1.8880e-01,  1.2885e-01,  6.9848e-02, -2.4894e-02, -1.9685e-01,
        -1.2416e-02,  5.9341e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0306e-02,  2.4312e+00, -6.3017e-03,  3.9323e-03, -3.2659e-02,
        -8.7766e-03, -6.1409e-03, -8.9661e-03, -1.3250e-02, -1.0239e-03,
        -3.4187e-03, -1.8498e-02,  1.7417e-03,  1.2071e-04, -1.4006e-02,
        -1.7551e-03, -1.0391e-02, -8.4669e-03, -1.0547e-02, -3.5564e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2639e-03,  1.1555e+01, -1.3663e-01, -6.0068e-02,  9.0982e-03,
         2.9933e-02,  6.4606e-02, -5.2292e-03,  1.1742e-02,  2.9062e-03,
        -1.4965e-02,  1.2711e-04,  1.8688e-03, -1.8963e-03,  4.8531e-03,
        -1.6561e-02, -2.3625e-02, -9.6193e-03, -1.6031e-03, -4.2179e-03,
        -7.9111e-03, -1.8010e-02, -4.3404e-03, -2.1267e-03,  1.9028e-02,
        -3.2634e-02, -2.2266e-02, -1.3299e-02, -9.5915e-03,  1.0597e-02,
        -7.5217e-03, -1.5536e-03, -4.3984e-02,  6.4301e-02, -2.7071e-02,
         6.6616e-03,  2.3721e-02, -2.7119e-03,  2.5966e-02,  3.7447e-03,
         5.4023e-04, -1.0357e-02,  4.9161e-03,  5.3415e-03, -4.7735e-03,
        -1.4807e-02, -1.3327e-03,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.9264e-02,  4.3284e+00, -7.6379e-03,  3.7249e-04,  2.3518e-03,
        -7.7042e-03,  1.1608e-02,  1.6371e-02,  1.1590e-02,  1.8722e-02,
        -2.6297e-02,  1.8027e-03, -3.6872e-03,  2.6278e-02, -2.8624e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7345e-03,  3.9972e-01, -1.2065e-03,  5.6804e-04, -3.1220e-04,
        -7.4124e-05, -3.3102e-05,  2.0923e-04,  3.4872e-04, -4.7595e-04,
         2.2307e-04, -3.2050e-04, -5.9772e-04,  1.0304e-03, -6.7038e-04,
        -1.0276e-03, -3.5483e-04, -5.9320e-04, -1.2064e-03, -3.9696e-04,
        -3.2286e-04, -5.6243e-04, -1.3134e-04,  1.9871e-04,  5.9663e-04,
        -1.0090e-03,  2.4235e-04, -4.9691e-04,  2.1173e-04,  4.0228e-04,
        -6.9526e-04, -2.7625e-04,  1.2683e-04, -3.3711e-05, -5.7061e-04,
        -8.8943e-05, -2.0610e-04,  2.5156e-04, -4.6042e-04,  1.3498e-04,
         2.5484e-04, -7.7312e-05,  9.8311e-05,  1.4791e-04, -3.1098e-04,
        -6.8831e-04,  1.0625e-03, -8.3624e-04], device='cuda:0',
       grad_fn=<SumBackward1>)]
